Skip to content

Commit

Permalink
support upstream libxml2 master up to GNOME/libxml2@05c147c3 (#3161)
Browse files Browse the repository at this point in the history
**What problem is this PR intended to solve?**

Closes #3156 and gets CI green against libxml2 master.

- CDATA.new no longer accepts `nil` for content
- update Node lifecycle to ensure the new libxml2 changes don't leak
memory
- update tests to reflect improved text coalescing

See commit logs for deeper explanations on these changes.


**Have you included adequate test coverage?**

Yes.


**Does this change affect the behavior of either the C or the Java
implementations?**

Both C and Java implementations no longer accept `nil` for CDATA
content.
  • Loading branch information
flavorjones committed Mar 22, 2024
2 parents d5cecb5 + e54cd33 commit 97f2579
Show file tree
Hide file tree
Showing 11 changed files with 55 additions and 47 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

* [CRuby] libgumbo (the HTML5 parser) treats reaching max-depth as EOF. This addresses a class of issues when the parser is interrupted in this way. [#3121] @stevecheckoway
* `Node#clone`, `NodeSet#clone`, and `*::Document#clone` all properly copy the metaclass of the original as expected. Previously, `#clone` had been aliased to `#dup` for these classes (since v1.3.0 in 2009). [#316, #3117] @flavorjones
* [CRuby] Update node GC lifecycle to avoid a potential memory leak with fragments in libxml 2.13.0 caused by changes in `xmlAddChild`. [#3156] @flavorjones


### Changed

* [CRuby] `Nokogiri::XML::CData.new` no longer accepts `nil` as the content argument, making `CData` behave like other character data classes (like `Comment` and `Text`). This change was necessitated by behavioral changes in the upcoming libxml 2.13.0 release. If you wish to create an empty CDATA node, pass an empty string. [#3156] @flavorjones


## v1.16.3 / 2024-03-15
Expand Down
7 changes: 7 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,13 @@ To run a focused test, use Minitest's `TESTOPTS`:
bundle exec rake compile test TESTOPTS="-n/test_last_element_child/"
```

Or to run tests on specific files, use `TESTGLOB`:

``` sh
bundle exec rake compile test TESTGLOB="test/**/test_*node*rb"
```


To run the test suite in parallel, set the `NCPU` environment variable; and to compile in parallel, set the `MAKEFLAGS` environment variable (you may want to set these in something like your .bashrc):

``` sh
Expand Down
3 changes: 3 additions & 0 deletions ext/java/nokogiri/XmlCdata.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public class XmlCdata extends XmlText
IRubyObject rbDocument = args[0];
content = args[1];

if (content.isNil()) {
throw context.runtime.newTypeError("expected second parameter to be a String, received NilClass");
}
if (!(rbDocument instanceof XmlNode)) {
String msg = "expected first parameter to be a Nokogiri::XML::Document, received " + rbDocument.getMetaClass();
throw context.runtime.newTypeError(msg);
Expand Down
12 changes: 2 additions & 10 deletions ext/nokogiri/xml_cdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
VALUE rb_content;
VALUE rb_rest;
VALUE rb_node;
xmlChar *c_content = NULL;
int c_content_len = 0;

rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);

Check_Type(rb_content, T_STRING);
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
rb_raise(rb_eTypeError,
"expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
Expand All @@ -40,15 +39,8 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
c_document = noko_xml_document_unwrap(rb_document);
}

if (!NIL_P(rb_content)) {
c_content = (xmlChar *)StringValuePtr(rb_content);
c_content_len = RSTRING_LENINT(rb_content);
}

c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);

c_node = xmlNewCDataBlock(c_document, (xmlChar *)StringValueCStr(rb_content), RSTRING_LENINT(rb_content));
noko_xml_document_pin_node(c_node);

rb_node = noko_xml_node_wrap(klass, c_node);
rb_obj_call_init(rb_node, argc, argv);

Expand Down
11 changes: 3 additions & 8 deletions ext/nokogiri/xml_comment.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,20 @@ new (int argc, VALUE *argv, VALUE klass)

rb_scan_args(argc, argv, "2*", &document, &content, &rest);

Check_Type(content, T_STRING);
if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) {
document = rb_funcall(document, document_id, 0);
} else if (!rb_obj_is_kind_of(document, cNokogiriXmlDocument)
&& !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) {
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
}

xml_doc = noko_xml_document_unwrap(document);

node = xmlNewDocComment(
xml_doc,
(const xmlChar *)StringValueCStr(content)
);

node = xmlNewDocComment(xml_doc, (const xmlChar *)StringValueCStr(content));
noko_xml_document_pin_node(node);
rb_node = noko_xml_node_wrap(klass, node);
rb_obj_call_init(rb_node, argc, argv);

noko_xml_document_pin_node(node);

if (rb_block_given_p()) { rb_yield(rb_node); }

return rb_node;
Expand Down
2 changes: 2 additions & 0 deletions ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
break;
default:
if (node->parent == NULL) {
node->next = NULL;
node->prev = NULL;
xmlAddChild((xmlNodePtr)doc, node);
}
}
Expand Down
29 changes: 14 additions & 15 deletions ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -1893,22 +1893,21 @@ output_node(
}
break;

case XML_ATTRIBUTE_NODE:
{
xmlAttrPtr attr = (xmlAttrPtr)node;
output_attr_name(out, attr);
if (attr->children) {
output_string(out, "=\"");
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
output_escaped_string(out, value, true);
xmlFree(value);
output_char(out, '"');
} else {
// Output name=""
output_string(out, "=\"\"");
}
case XML_ATTRIBUTE_NODE: {
xmlAttrPtr attr = (xmlAttrPtr)node;
output_attr_name(out, attr);
if (attr->children) {
output_string(out, "=\"");
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
output_escaped_string(out, value, true);
xmlFree(value);
output_char(out, '"');
} else {
// Output name=""
output_string(out, "=\"\"");
}
break;
}
break;

case XML_TEXT_NODE:
if (node->parent
Expand Down
6 changes: 2 additions & 4 deletions ext/nokogiri/xml_text.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)

rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);

Check_Type(rb_string, T_STRING);
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
rb_raise(rb_eTypeError,
"expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
Expand All @@ -35,11 +36,8 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
c_document = noko_xml_document_unwrap(rb_document);
}

c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
c_node->doc = c_document;

c_node = xmlNewDocText(c_document, (xmlChar *)StringValueCStr(rb_string));
noko_xml_document_pin_node(c_node);

rb_node = noko_xml_node_wrap(klass, c_node) ;
rb_obj_call_init(rb_node, argc, argv);

Expand Down
5 changes: 2 additions & 3 deletions rakelib/test.rake
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,12 @@ end

def nokogiri_test_task_configuration(t)
t.libs << "test"
# t.verbose = true # This is noisier than we need. Commenting out 2024-03-07.
# t.options = "-v" if ENV["CI"] # I haven't needed this in a long time. Commenting out 2023-12-10.
t.verbose = true if ENV["TESTGLOB"]
end

def nokogiri_test_case_configuration(t)
nokogiri_test_task_configuration(t)
t.test_files = FileList["test/**/test_*.rb"]
t.test_files = FileList[ENV["TESTGLOB"] || "test/**/test_*.rb"]
end

def nokogiri_test_bench_configuration(t)
Expand Down
9 changes: 4 additions & 5 deletions test/xml/test_cdata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,10 @@
assert_same(doc, node.document)
end

it "has nil content when passed nil" do
node = Nokogiri::XML::CDATA.new(Nokogiri::XML::Document.new, nil)

assert_instance_of(Nokogiri::XML::CDATA, node)
assert_nil(node.content)
it "when passed nil raises TypeError" do
assert_raises(TypeError) do
Nokogiri::XML::CDATA.new(Nokogiri::XML::Document.new, nil)
end
end

it "does not accept anything but a string" do
Expand Down
12 changes: 10 additions & 2 deletions test/xml/test_node_reparenting.rb
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,11 @@ def coerce(data)
assert_equal "after", after.content
refute_nil after.parent, "unrelated node should not be affected"

assert_equal "before", before.content
if Nokogiri.uses_libxml?(">= 2.13.0")
assert_equal "beforex", before.content # coalescing fixed in gnome/libxml2@4ccd3eb8
else
assert_equal "before", before.content
end
refute_nil before.parent, "no need to reparent"
end
end
Expand Down Expand Up @@ -662,7 +666,11 @@ def coerce(data)
assert_equal "before", before.content
refute_nil before.parent, "unrelated node should not be affected"

assert_equal "after", after.content
if Nokogiri.uses_libxml?(">= 2.13.0")
assert_equal "xafter", after.content # coalescing fixed in gnome/libxml2@4ccd3eb8
else
assert_equal "after", after.content
end
refute_nil after.parent
end
end
Expand Down

0 comments on commit 97f2579

Please sign in to comment.