diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index da2774ba8c..439f721964 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -133,6 +133,31 @@ static VALUE read_memory(VALUE klass, VALUE content) return rb_schema; } +/* Schema creation will remove and deallocate "blank" nodes. + * If those blank nodes have been exposed to Ruby, they could get freed + * out from under the VALUE pointer. This function checks to see if any of + * those nodes have been exposed to Ruby, and if so we should raise an exception. + */ +static int has_blank_nodes_p(VALUE cache) +{ + long i; + + if (NIL_P(cache)) { + return 0; + } + + for (i = 0; i < RARRAY_LEN(cache); i++) { + xmlNodePtr node; + VALUE element = rb_ary_entry(cache, i); + Data_Get_Struct(element, xmlNode, node); + if (xmlIsBlankNode(node)) { + return 1; + } + } + + return 0; +} + /* * call-seq: * from_document(doc) @@ -152,6 +177,10 @@ static VALUE from_document(VALUE klass, VALUE document) /* In case someone passes us a node. ugh. */ doc = doc->doc; + if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) { + rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous"); + } + ctx = xmlSchemaNewDocParserCtxt(doc); errors = rb_ary_new(); diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 0ad03aa3eb..eaf3442ee5 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -7,6 +7,34 @@ def setup assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) end + def test_segv + skip("Pure Java version shouldn't have this bug") unless Nokogiri.uses_libxml? + + # This is a test for a workaround for a bug in LibXML2. The upstream + # bug is here: https://gitlab.gnome.org/GNOME/libxml2/issues/148 + # Schema creation can result in dangling pointers. If no nodes have + # been exposed, then it should be fine to create a schema. If nodes + # have been exposed to Ruby, then we need to make sure they won't be + # freed out from under us. + doc = <<~doc + + + doc + + # This is OK, no nodes have been exposed + xsd_doc = Nokogiri::XML(doc) + assert Nokogiri::XML::Schema.from_document(xsd_doc) + + # This is not OK, nodes have been exposed to Ruby + xsd_doc = Nokogiri::XML(doc) + node = xsd_doc.root.children.find(&:blank?) # Finds a node + + ex = assert_raise(ArgumentError) do + Nokogiri::XML::Schema.from_document(xsd_doc) + end + assert_match(/blank nodes/, ex.message) + end + def test_schema_from_document doc = Nokogiri::XML(File.open(PO_SCHEMA_FILE)) assert doc