From b370fbd290343e93f20e6bf12522c5ba3f2f5caf Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Thu, 16 Jun 2022 13:15:51 -0700 Subject: [PATCH 1/6] Always set a mark function on the node wrapper Unconditionally set a mark function on the node wrapper. We will later refactor this to use TypedData_Wrap_Struct and we don't want to conditionally set mark functions on that Co-Authored-By: Mike Dalessio --- ext/nokogiri/xml_node.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 14f1a871ff..8399cec224 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -19,10 +19,13 @@ _xml_node_dealloc(xmlNodePtr x) # define _xml_node_dealloc 0 #endif - static void _xml_node_mark(xmlNodePtr node) { + if (!DOC_RUBY_OBJECT_TEST(node->doc)) { + return; + } + xmlDocPtr doc = node->doc; if (doc->type == XML_DOCUMENT_NODE || doc->type == XML_HTML_DOCUMENT_NODE) { if (DOC_RUBY_OBJECT_TEST(doc)) { @@ -1981,7 +1984,6 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) VALUE rb_document, rb_node_cache, rb_node; nokogiriTuplePtr node_has_a_document; xmlDocPtr c_doc; - void (*f_mark)(xmlNodePtr) = NULL ; assert(c_node); @@ -1989,11 +1991,9 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) return DOC_RUBY_OBJECT(c_node->doc); } - /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */ - /* see https://github.com/sparklemotion/nokogiri/issues/95 */ - /* and https://github.com/sparklemotion/nokogiri/issues/439 */ c_doc = c_node->doc; - if (c_doc->type == XML_DOCUMENT_FRAG_NODE) { c_doc = c_doc->doc; } + + // Nodes yielded from XML::Reader don't have a fully-realized Document node_has_a_document = DOC_RUBY_OBJECT_TEST(c_doc); if (c_node->_private && node_has_a_document) { @@ -2043,9 +2043,7 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) } } - f_mark = node_has_a_document ? _xml_node_mark : NULL ; - - rb_node = Data_Wrap_Struct(rb_class, f_mark, _xml_node_dealloc, c_node) ; + rb_node = Data_Wrap_Struct(rb_class, _xml_node_mark, _xml_node_dealloc, c_node) ; c_node->_private = (void *)rb_node; if (node_has_a_document) { From 9411ff79199eb9e9b3d11809549876c5d3d0fe9f Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 17 Jun 2022 12:31:53 -0700 Subject: [PATCH 2/6] Add a "node unwrap" macro and use it This commit introduces a `Noko_Node_Get_Struct` macro for unwrapping the underlying data pointer from Nokogiri objects. Then we change all places that unwrap nodes to use the new macro. We also converted xmlNode to use `TypedData_Wrap_Struct` so we can provide a compaction function. Note that we use DATA_PTR instead of "typed" unwrappers everywhere because the large number of C structs that inherit from xmlNode would make the unwrapping code unwieldy and type-check-heavy. Co-Authored-By: Matt Valentine-House Co-Authored-By: Mike Dalessio --- ext/nokogiri/gumbo.c | 2 +- ext/nokogiri/nokogiri.h | 2 + ext/nokogiri/xml_attr.c | 4 +- ext/nokogiri/xml_attribute_decl.c | 6 +- ext/nokogiri/xml_cdata.c | 2 +- ext/nokogiri/xml_document.c | 2 +- ext/nokogiri/xml_dtd.c | 16 ++--- ext/nokogiri/xml_element_decl.c | 6 +- ext/nokogiri/xml_entity_decl.c | 10 +-- ext/nokogiri/xml_node.c | 116 ++++++++++++++++-------------- ext/nokogiri/xml_node_set.c | 8 +-- ext/nokogiri/xml_schema.c | 6 +- ext/nokogiri/xml_text.c | 2 +- ext/nokogiri/xml_xpath_context.c | 2 +- 14 files changed, 96 insertions(+), 88 deletions(-) diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c index 41a8667ca3..56475749fa 100644 --- a/ext/nokogiri/gumbo.c +++ b/ext/nokogiri/gumbo.c @@ -404,7 +404,7 @@ static xmlNodePtr extract_xml_node(VALUE node) { xmlNodePtr xml_node; - Data_Get_Struct(node, xmlNode, xml_node); + Noko_Node_Get_Struct(node, xmlNode, xml_node); return xml_node; } diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index e473ee1546..42f4fd4a4d 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -177,6 +177,8 @@ int noko_io_read(void *ctx, char *buffer, int len); int noko_io_write(void *ctx, char *buffer, int len); int noko_io_close(void *ctx); +#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) + VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ; VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ; VALUE noko_xml_node_attrs(xmlNodePtr node) ; diff --git a/ext/nokogiri/xml_attr.c b/ext/nokogiri/xml_attr.c index 63e5498b53..9fbda17320 100644 --- a/ext/nokogiri/xml_attr.c +++ b/ext/nokogiri/xml_attr.c @@ -16,7 +16,7 @@ set_value(VALUE self, VALUE content) xmlChar *value; xmlNode *cur; - Data_Get_Struct(self, xmlAttr, attr); + Noko_Node_Get_Struct(self, xmlAttr, attr); if (attr->children) { xmlFreeNodeList(attr->children); @@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass) rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document"); } - Data_Get_Struct(document, xmlDoc, xml_doc); + Noko_Node_Get_Struct(document, xmlDoc, xml_doc); node = xmlNewDocProp( xml_doc, diff --git a/ext/nokogiri/xml_attribute_decl.c b/ext/nokogiri/xml_attribute_decl.c index 8603ff9553..951e393ce1 100644 --- a/ext/nokogiri/xml_attribute_decl.c +++ b/ext/nokogiri/xml_attribute_decl.c @@ -12,7 +12,7 @@ static VALUE attribute_type(VALUE self) { xmlAttributePtr node; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); return INT2NUM((long)node->atype); } @@ -26,7 +26,7 @@ static VALUE default_value(VALUE self) { xmlAttributePtr node; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); if (node->defaultValue) { return NOKOGIRI_STR_NEW2(node->defaultValue); } return Qnil; @@ -45,7 +45,7 @@ enumeration(VALUE self) xmlEnumerationPtr enm; VALUE list; - Data_Get_Struct(self, xmlAttribute, node); + Noko_Node_Get_Struct(self, xmlAttribute, node); list = rb_ary_new(); enm = node->tree; diff --git a/ext/nokogiri/xml_cdata.c b/ext/nokogiri/xml_cdata.c index 954f3ab3e3..6ca23a9eef 100644 --- a/ext/nokogiri/xml_cdata.c +++ b/ext/nokogiri/xml_cdata.c @@ -25,7 +25,7 @@ new (int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &doc, &content, &rest); - Data_Get_Struct(doc, xmlDoc, xml_doc); + Noko_Node_Get_Struct(doc, xmlDoc, xml_doc); if (!NIL_P(content)) { content_str = (xmlChar *)StringValuePtr(content); diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 134931de6b..187ba1d86f 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -161,7 +161,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root) rb_obj_class(rb_new_root)); } - Data_Get_Struct(rb_new_root, xmlNode, c_new_root); + Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root); /* If the new root's document is not the same as the current document, * then we need to dup the node in to this document. */ diff --git a/ext/nokogiri/xml_dtd.c b/ext/nokogiri/xml_dtd.c index 15908d3399..05be753b51 100644 --- a/ext/nokogiri/xml_dtd.c +++ b/ext/nokogiri/xml_dtd.c @@ -44,7 +44,7 @@ entities(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->entities) { return Qnil; } @@ -67,7 +67,7 @@ notations(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->notations) { return Qnil; } @@ -90,7 +90,7 @@ attributes(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); hash = rb_hash_new(); @@ -113,7 +113,7 @@ elements(VALUE self) xmlDtdPtr dtd; VALUE hash; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->elements) { return Qnil; } @@ -138,8 +138,8 @@ validate(VALUE self, VALUE document) xmlValidCtxtPtr ctxt; VALUE error_list; - Data_Get_Struct(self, xmlDtd, dtd); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(document, xmlDoc, doc); error_list = rb_ary_new(); ctxt = xmlNewValidCtxt(); @@ -165,7 +165,7 @@ static VALUE system_id(VALUE self) { xmlDtdPtr dtd; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->SystemID) { return Qnil; } @@ -182,7 +182,7 @@ static VALUE external_id(VALUE self) { xmlDtdPtr dtd; - Data_Get_Struct(self, xmlDtd, dtd); + Noko_Node_Get_Struct(self, xmlDtd, dtd); if (!dtd->ExternalID) { return Qnil; } diff --git a/ext/nokogiri/xml_element_decl.c b/ext/nokogiri/xml_element_decl.c index 178c69b80d..392a300372 100644 --- a/ext/nokogiri/xml_element_decl.c +++ b/ext/nokogiri/xml_element_decl.c @@ -14,7 +14,7 @@ static VALUE element_type(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); return INT2NUM((long)node->etype); } @@ -28,7 +28,7 @@ static VALUE content(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); if (!node->content) { return Qnil; } @@ -48,7 +48,7 @@ static VALUE prefix(VALUE self) { xmlElementPtr node; - Data_Get_Struct(self, xmlElement, node); + Noko_Node_Get_Struct(self, xmlElement, node); if (!node->prefix) { return Qnil; } diff --git a/ext/nokogiri/xml_entity_decl.c b/ext/nokogiri/xml_entity_decl.c index 50893d2da1..846509abe8 100644 --- a/ext/nokogiri/xml_entity_decl.c +++ b/ext/nokogiri/xml_entity_decl.c @@ -12,7 +12,7 @@ static VALUE original_content(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->orig) { return Qnil; } @@ -29,7 +29,7 @@ static VALUE get_content(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->content) { return Qnil; } @@ -46,7 +46,7 @@ static VALUE entity_type(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); return INT2NUM((int)node->etype); } @@ -61,7 +61,7 @@ static VALUE external_id(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->ExternalID) { return Qnil; } @@ -78,7 +78,7 @@ static VALUE system_id(VALUE self) { xmlEntityPtr node; - Data_Get_Struct(self, xmlEntity, node); + Noko_Node_Get_Struct(self, xmlEntity, node); if (!node->SystemID) { return Qnil; } diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 8399cec224..7e24f273df 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -7,7 +7,6 @@ static ID id_decorate, id_decorate_bang; typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr); - #ifdef DEBUG static void _xml_node_dealloc(xmlNodePtr x) @@ -36,6 +35,14 @@ _xml_node_mark(xmlNodePtr node) } } +static const rb_data_type_t nokogiri_node_type = { + "Nokogiri/XMLNode", + {_xml_node_mark, _xml_node_dealloc, 0,}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; static void relink_namespace(xmlNodePtr reparented) @@ -201,8 +208,8 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node"); } - Data_Get_Struct(reparentee_obj, xmlNode, reparentee); - Data_Get_Struct(pivot_obj, xmlNode, pivot); + Noko_Node_Get_Struct(reparentee_obj, xmlNode, reparentee); + Noko_Node_Get_Struct(pivot_obj, xmlNode, pivot); /* * Check if nodes given are appropriate to have a parent-child @@ -442,7 +449,7 @@ rb_xml_node_add_namespace_definition(VALUE rb_node, VALUE rb_prefix, VALUE rb_hr xmlNsPtr c_namespace; const xmlChar *c_prefix = (const xmlChar *)(NIL_P(rb_prefix) ? NULL : StringValueCStr(rb_prefix)); - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); element = c_node ; c_namespace = xmlSearchNs(c_node->doc, c_node, c_prefix); @@ -509,7 +516,7 @@ rb_xml_node_attribute(VALUE self, VALUE name) { xmlNodePtr node; xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); prop = xmlHasProp(node, (xmlChar *)StringValueCStr(name)); if (! prop) { return Qnil; } @@ -560,7 +567,7 @@ rb_xml_node_attribute_nodes(VALUE rb_node) { xmlNodePtr c_node; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); return noko_xml_node_attrs(c_node); } @@ -612,7 +619,7 @@ rb_xml_node_attribute_with_ns(VALUE self, VALUE name, VALUE namespace) { xmlNodePtr node; xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); prop = xmlHasNsProp(node, (xmlChar *)StringValueCStr(name), NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace)); @@ -639,7 +646,7 @@ static VALUE rb_xml_node_blank_eh(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ; } @@ -661,7 +668,7 @@ static VALUE rb_xml_node_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = node->children; if (!child) { return Qnil; } @@ -686,7 +693,7 @@ rb_xml_node_children(VALUE self) VALUE document; VALUE node_set; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = node->children; set = xmlXPathNodeSetCreate(child); @@ -745,7 +752,7 @@ rb_xml_node_content(VALUE self) xmlNodePtr node; xmlChar *content; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); content = xmlNodeGetContent(node); if (content) { @@ -768,7 +775,7 @@ static VALUE rb_xml_node_document(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return DOC_RUBY_OBJECT(node->doc); } @@ -783,7 +790,7 @@ static VALUE rb_xml_node_pointer_id(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return INT2NUM((long)(node)); } @@ -800,7 +807,7 @@ encode_special_chars(VALUE self, VALUE string) xmlChar *encoded; VALUE encoded_str; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); encoded = xmlEncodeSpecialChars( node->doc, (const xmlChar *)StringValueCStr(string) @@ -831,7 +838,7 @@ create_internal_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = node->doc; @@ -864,7 +871,7 @@ create_external_subset(VALUE self, VALUE name, VALUE external_id, VALUE system_i xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = node->doc; @@ -897,7 +904,7 @@ external_subset(VALUE self) xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (!node->doc) { return Qnil; } @@ -922,7 +929,7 @@ internal_subset(VALUE self) xmlDocPtr doc; xmlDtdPtr dtd; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (!node->doc) { return Qnil; } @@ -958,7 +965,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self) xmlDocPtr new_parent_doc; xmlNodePtr node, dup; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc); @@ -991,7 +998,7 @@ static VALUE unlink_node(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlUnlinkNode(node); noko_xml_document_pin_node(node); return self; @@ -1008,7 +1015,7 @@ static VALUE next_sibling(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = node->next; if (!sibling) { return Qnil; } @@ -1026,7 +1033,7 @@ static VALUE previous_sibling(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = node->prev; if (!sibling) { return Qnil; } @@ -1044,7 +1051,7 @@ static VALUE next_element(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); sibling = xmlNextElementSibling(node); if (!sibling) { return Qnil; } @@ -1062,7 +1069,7 @@ static VALUE previous_element(VALUE self) { xmlNodePtr node, sibling; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); /* * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7. @@ -1084,7 +1091,7 @@ replace(VALUE self, VALUE new_node) VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper); xmlNodePtr pivot; - Data_Get_Struct(self, xmlNode, pivot); + Noko_Node_Get_Struct(self, xmlNode, pivot); noko_xml_document_pin_node(pivot); return reparent; @@ -1119,7 +1126,7 @@ rb_xml_node_element_children(VALUE self) VALUE document; VALUE node_set; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlFirstElementChild(node); set = xmlXPathNodeSetCreate(child); @@ -1158,7 +1165,7 @@ static VALUE rb_xml_node_first_element_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlFirstElementChild(node); if (!child) { return Qnil; } @@ -1185,7 +1192,7 @@ static VALUE rb_xml_node_last_element_child(VALUE self) { xmlNodePtr node, child; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = xmlLastElementChild(node); if (!child) { return Qnil; } @@ -1203,7 +1210,7 @@ static VALUE key_eh(VALUE self, VALUE attribute) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (xmlHasProp(node, (xmlChar *)StringValueCStr(attribute))) { return Qtrue; } @@ -1220,7 +1227,7 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (xmlHasNsProp(node, (xmlChar *)StringValueCStr(attribute), NIL_P(namespace) ? NULL : (xmlChar *)StringValueCStr(namespace))) { return Qtrue; @@ -1239,7 +1246,7 @@ set(VALUE self, VALUE property, VALUE value) { xmlNodePtr node, cur; xmlAttrPtr prop; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); /* If a matching attribute node already exists, then xmlSetProp will destroy * the existing node's children. However, if Nokogiri has a node object @@ -1284,7 +1291,7 @@ get(VALUE self, VALUE rattribute) if (NIL_P(rattribute)) { return Qnil; } - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); attribute = xmlCharStrdup(StringValueCStr(rattribute)); colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':')); @@ -1326,7 +1333,7 @@ set_namespace(VALUE self, VALUE namespace) xmlNodePtr node; xmlNsPtr ns = NULL; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (!NIL_P(namespace)) { Data_Get_Struct(namespace, xmlNs, ns); @@ -1363,7 +1370,7 @@ static VALUE rb_xml_node_namespace(VALUE rb_node) { xmlNodePtr c_node ; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); if (c_node->ns) { return noko_xml_namespace_wrap(c_node->ns, c_node->doc); @@ -1408,7 +1415,7 @@ namespace_definitions(VALUE rb_node) xmlNsPtr c_namespace; VALUE definitions = rb_ary_new(); - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); c_namespace = c_node->nsDef; if (!c_namespace) { @@ -1459,7 +1466,7 @@ rb_xml_node_namespace_scopes(VALUE rb_node) VALUE scopes = rb_ary_new(); int j; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); namespaces = xmlGetNsList(c_node->doc, c_node); if (!namespaces) { @@ -1484,7 +1491,7 @@ static VALUE node_type(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); return INT2NUM((long)node->type); } @@ -1498,7 +1505,7 @@ static VALUE set_native_content(VALUE self, VALUE content) { xmlNodePtr node, child, next ; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); child = node->children; while (NULL != child) { @@ -1524,7 +1531,7 @@ set_lang(VALUE self_rb, VALUE lang_rb) xmlNodePtr self ; xmlChar *lang ; - Data_Get_Struct(self_rb, xmlNode, self); + Noko_Node_Get_Struct(self_rb, xmlNode, self); lang = (xmlChar *)StringValueCStr(lang_rb); xmlNodeSetLang(self, lang); @@ -1546,7 +1553,7 @@ get_lang(VALUE self_rb) xmlChar *lang ; VALUE lang_rb ; - Data_Get_Struct(self_rb, xmlNode, self); + Noko_Node_Get_Struct(self_rb, xmlNode, self); lang = xmlNodeGetLang(self); if (lang) { @@ -1575,7 +1582,7 @@ static VALUE get_parent(VALUE self) { xmlNodePtr node, parent; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); parent = node->parent; if (!parent) { return Qnil; } @@ -1593,7 +1600,7 @@ static VALUE set_name(VALUE self, VALUE new_name) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlNodeSetName(node, (xmlChar *)StringValueCStr(new_name)); return new_name; } @@ -1608,7 +1615,7 @@ static VALUE get_name(VALUE self) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); if (node->name) { return NOKOGIRI_STR_NEW2(node->name); } @@ -1628,7 +1635,7 @@ rb_xml_node_path(VALUE rb_node) xmlChar *c_path ; VALUE rval; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); c_path = xmlGetNodePath(c_node); if (c_path == NULL) { @@ -1677,7 +1684,7 @@ native_write_to( const char *before_indent; xmlSaveCtxtPtr savectx; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlIndentTreeOutput = 1; @@ -1731,7 +1738,7 @@ static VALUE rb_xml_node_line(VALUE rb_node) { xmlNodePtr c_node; - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); return INT2NUM(xmlGetLineNo(c_node)); } @@ -1748,7 +1755,7 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number) xmlNodePtr c_node; int line_number = NUM2INT(rb_line_number); - Data_Get_Struct(rb_node, xmlNode, c_node); + Noko_Node_Get_Struct(rb_node, xmlNode, c_node); // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes. // search for "psvi" in SAX2.c and tree.c to learn more. @@ -1784,7 +1791,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass) // TODO: deprecate allowing Node rb_warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); } - Data_Get_Struct(rb_document_node, xmlNode, c_document_node); + Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node); c_node = xmlNewNode(NULL, (xmlChar *)StringValueCStr(rb_name)); c_node->doc = c_document_node->doc; @@ -1814,7 +1821,7 @@ dump_html(VALUE self) xmlNodePtr node ; VALUE html; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); buf = xmlBufferCreate() ; htmlNodeDump(buf, node->doc, node); @@ -1833,8 +1840,8 @@ static VALUE compare(VALUE self, VALUE _other) { xmlNodePtr node, other; - Data_Get_Struct(self, xmlNode, node); - Data_Get_Struct(_other, xmlNode, other); + Noko_Node_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(_other, xmlNode, other); return INT2NUM((long)xmlXPathCmpNodes(other, node)); } @@ -1854,7 +1861,7 @@ process_xincludes(VALUE self, VALUE options) xmlNodePtr node; VALUE error_list = rb_ary_new(); - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options)); @@ -1885,7 +1892,7 @@ in_context(VALUE self, VALUE _str, VALUE _options) VALUE doc, err; int doc_is_empty; - Data_Get_Struct(self, xmlNode, node); + Noko_Node_Get_Struct(self, xmlNode, node); doc = DOC_RUBY_OBJECT(node->doc); err = rb_iv_get(doc, "@errors"); @@ -1977,7 +1984,6 @@ in_context(VALUE self, VALUE _str, VALUE _options) return noko_xml_node_set_wrap(set, doc); } - VALUE noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) { @@ -2043,7 +2049,7 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node) } } - rb_node = Data_Wrap_Struct(rb_class, _xml_node_mark, _xml_node_dealloc, c_node) ; + rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ; c_node->_private = (void *)rb_node; if (node_has_a_document) { diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index 5670371a93..66f8780d5a 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -156,7 +156,7 @@ push(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); xmlXPathNodeSetAdd(node_set, node); @@ -179,7 +179,7 @@ delete (VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); if (xmlXPathNodeSetContains(node_set, node)) { xpath_node_set_del(node_set, node); @@ -228,7 +228,7 @@ include_eh(VALUE self, VALUE rb_node) Check_Node_Set_Node_Type(rb_node); Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_node, xmlNode, node); + Noko_Node_Get_Struct(rb_node, xmlNode, node); return (xmlXPathNodeSetContains(node_set, node) ? Qtrue : Qfalse); } @@ -430,7 +430,7 @@ unlink_nodeset(VALUE self) xmlNodePtr node_ptr; node = noko_xml_node_wrap(Qnil, node_set->nodeTab[j]); rb_funcall(node, rb_intern("unlink"), 0); /* modifies the C struct out from under the object */ - Data_Get_Struct(node, xmlNode, node_ptr); + Noko_Node_Get_Struct(node, xmlNode, node_ptr); node_set->nodeTab[j] = node_ptr ; } } diff --git a/ext/nokogiri/xml_schema.c b/ext/nokogiri/xml_schema.c index a8175e6da3..2707cae235 100644 --- a/ext/nokogiri/xml_schema.c +++ b/ext/nokogiri/xml_schema.c @@ -25,7 +25,7 @@ validate_document(VALUE self, VALUE document) VALUE errors; Data_Get_Struct(self, xmlSchema, schema); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); errors = rb_ary_new(); @@ -179,7 +179,7 @@ has_blank_nodes_p(VALUE cache) for (i = 0; i < RARRAY_LEN(cache); i++) { xmlNodePtr node; VALUE element = rb_ary_entry(cache, i); - Data_Get_Struct(element, xmlNode, node); + Noko_Node_Get_Struct(element, xmlNode, node); if (xmlIsBlankNode(node)) { return 1; } @@ -210,7 +210,7 @@ from_document(int argc, VALUE *argv, VALUE klass) scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); doc = doc->doc; /* In case someone passes us a node. ugh. */ if (scanned_args == 1) { diff --git a/ext/nokogiri/xml_text.c b/ext/nokogiri/xml_text.c index eb78e2373c..6ba75e450b 100644 --- a/ext/nokogiri/xml_text.c +++ b/ext/nokogiri/xml_text.c @@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass) rb_scan_args(argc, argv, "2*", &string, &document, &rest); - Data_Get_Struct(document, xmlDoc, doc); + Noko_Node_Get_Struct(document, xmlDoc, doc); node = xmlNewText((xmlChar *)StringValueCStr(string)); node->doc = doc->doc; diff --git a/ext/nokogiri/xml_xpath_context.c b/ext/nokogiri/xml_xpath_context.c index cc923e9183..c3c4154d0d 100644 --- a/ext/nokogiri/xml_xpath_context.c +++ b/ext/nokogiri/xml_xpath_context.c @@ -372,7 +372,7 @@ new (VALUE klass, VALUE nodeobj) xmlXPathContextPtr ctx; VALUE self; - Data_Get_Struct(nodeobj, xmlNode, node); + Noko_Node_Get_Struct(nodeobj, xmlNode, node); xmlXPathInit(); From 513207f557a6b4e5630811b86966bcb29eed8697 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 17 Jun 2022 12:43:35 -0700 Subject: [PATCH 3/6] Add compaction callback to nodes This fixes the case where nodes moves and we need to update the private references. Co-Authored-By: Matt Valentine-House Co-Authored-By: Mike Dalessio --- ext/nokogiri/xml_node.c | 12 +++++++++++- test/test_compaction.rb | 19 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 test/test_compaction.rb diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 7e24f273df..eb2f11e3bc 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -35,9 +35,19 @@ _xml_node_mark(xmlNodePtr node) } } +static void +_xml_node_update_references(xmlNodePtr node) +{ + if (node->_private) { + node->_private = (void *)rb_gc_location((VALUE)node->_private); + } +} + +typedef void (*gc_callback_t)(void *); + static const rb_data_type_t nokogiri_node_type = { "Nokogiri/XMLNode", - {_xml_node_mark, _xml_node_dealloc, 0,}, + {(gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0, (gc_callback_t)_xml_node_update_references}, 0, 0, #ifdef RUBY_TYPED_FREE_IMMEDIATELY RUBY_TYPED_FREE_IMMEDIATELY, diff --git a/test/test_compaction.rb b/test/test_compaction.rb new file mode 100644 index 0000000000..591b242686 --- /dev/null +++ b/test/test_compaction.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +require "helper" + +describe "compaction" do + it "https://github.com/sparklemotion/nokogiri/pull/2579" do + big_doc = "" + ("a".."zz").map { |x| "<#{x}>#{x}" }.join + "" + doc = Nokogiri.XML(big_doc) + + # ensure a bunch of node objects have been wrapped + doc.root.children.each(&:inspect) + + # compact the heap and try to get the node wrappers to move + GC.verify_compaction_references(double_heap: true, toward: :empty) + + # access the node wrappers and make sure they didn't move + doc.root.children.each(&:inspect) + end +end From b9f91756a6f494426b8b8ad873a4f9fa3df8778b Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 17 Jun 2022 14:13:36 -0700 Subject: [PATCH 4/6] Only check compaction on platforms that support it Co-Authored-By: Mike Dalessio --- test/test_compaction.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/test_compaction.rb b/test/test_compaction.rb index 591b242686..5b0720f670 100644 --- a/test/test_compaction.rb +++ b/test/test_compaction.rb @@ -4,6 +4,8 @@ describe "compaction" do it "https://github.com/sparklemotion/nokogiri/pull/2579" do + skip unless GC.respond_to?(:verify_compaction_references) + big_doc = "" + ("a".."zz").map { |x| "<#{x}>#{x}" }.join + "" doc = Nokogiri.XML(big_doc) From 535bb50fc49c407f04a9380cc4987106f9dca424 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Fri, 17 Jun 2022 14:26:14 -0700 Subject: [PATCH 5/6] Only add compaction callback on Rubies that support it We don't have `rb_gc_location` everywhere, so check for it in the extconf and then conditionally set the compaction callback Co-Authored-By: Mike Dalessio --- ext/nokogiri/extconf.rb | 1 + ext/nokogiri/xml_node.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 03c07ff26f..c2aeed2d21 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -972,6 +972,7 @@ def compile have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21 have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 +have_func("rb_gc_location") # introduced in Ruby 2.7 other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",") append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""]) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index eb2f11e3bc..2507f577f1 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -35,6 +35,7 @@ _xml_node_mark(xmlNodePtr node) } } +#ifdef HAVE_RB_GC_LOCATION static void _xml_node_update_references(xmlNodePtr node) { @@ -42,15 +43,21 @@ _xml_node_update_references(xmlNodePtr node) node->_private = (void *)rb_gc_location((VALUE)node->_private); } } +#endif typedef void (*gc_callback_t)(void *); static const rb_data_type_t nokogiri_node_type = { - "Nokogiri/XMLNode", - {(gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0, (gc_callback_t)_xml_node_update_references}, - 0, 0, + "Nokogiri/XMLNode", + { + (gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0, +#ifdef HAVE_RB_GC_LOCATION + (gc_callback_t)_xml_node_update_references +#endif + }, + 0, 0, #ifdef RUBY_TYPED_FREE_IMMEDIATELY - RUBY_TYPED_FREE_IMMEDIATELY, + RUBY_TYPED_FREE_IMMEDIATELY, #endif }; From a0360803740e5c85767ace75563bb3a3b33d1963 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 11 Jul 2022 08:20:02 -0400 Subject: [PATCH 6/6] test: add memcheck suppressions for warnings triggered by the compaction test. - for memory allocated in ary_heap_realloc - for memory added to the mark stack in stack_chunk_alloc --- suppressions/nokogiri_ruby.supp | 48 +++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/suppressions/nokogiri_ruby.supp b/suppressions/nokogiri_ruby.supp index 01c8f3231b..decd2ffac0 100644 --- a/suppressions/nokogiri_ruby.supp +++ b/suppressions/nokogiri_ruby.supp @@ -173,3 +173,51 @@ fun:xmlXPathEval fun:evaluate } +{ + triggered by compaction stress test - https://github.com/sparklemotion/nokogiri/pull/2579 + # + # 39,848 bytes in 1 blocks are definitely lost in loss record 38,618 of 38,681 + # realloc (vg_replace_malloc.c:834) + # objspace_xrealloc.constprop.0 (gc.c:11516) + # ary_heap_realloc (array.c:381) + # ary_resize_capa (array.c:461) + # ary_double_capa (array.c:507) + # ary_ensure_room_for_push (array.c:654) + # rb_ary_push (array.c:1311) + # *noko_xml_node_wrap (xml_node.c:2075) + # *noko_xml_node_attrs (xml_node.c:2094) + # + Memcheck:Leak + fun:realloc + fun:objspace_xrealloc* + fun:ary_heap_realloc + ... + fun:ary_ensure_room_for_push + fun:rb_ary_push + fun:noko_xml_node_wrap +} +{ + triggered by compaction stress test - https://github.com/sparklemotion/nokogiri/pull/2579 + # + # 52,104 (4,008 direct, 48,096 indirect) bytes in 1 blocks are definitely lost in loss record 38,831 of 38,877 + # malloc (vg_replace_malloc.c:307) + # stack_chunk_alloc (gc.c:5917) + # push_mark_stack_chunk (gc.c:5980) + # push_mark_stack (gc.c:6038) + # gc_grey (gc.c:6665) + # *mark (xml_node_set.c:50) + # gc_mark_stacked_objects (gc.c:7071) + # gc_mark_stacked_objects_all (gc.c:7111) + # gc_marks_rest (gc.c:8095) + # gc_marks (gc.c:8151) + # gc_start (gc.c:8967) + # + Memcheck:Leak + fun:malloc + fun:stack_chunk_alloc + fun:push_mark_stack_chunk + fun:push_mark_stack + fun:gc_grey + ... + fun:gc_start +}