Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate Nokogiri::XML::Document to the TypedData API #2807

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion ext/nokogiri/html4_document.c
Expand Up @@ -145,7 +145,7 @@ static VALUE
rb_html_document_type(VALUE self)
{
htmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);
return INT2NUM(doc->type);
}

Expand Down
2 changes: 2 additions & 0 deletions ext/nokogiri/nokogiri.h
Expand Up @@ -166,6 +166,8 @@ typedef struct _nokogiriXsltStylesheetTuple {
VALUE func_instances;
} nokogiriXsltStylesheetTuple;

extern const rb_data_type_t noko_xml_document_data_type;

void noko_xml_document_pin_node(xmlNodePtr);
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);

Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_comment.c
Expand Up @@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
}

Data_Get_Struct(document, xmlDoc, xml_doc);
TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);

node = xmlNewDocComment(
xml_doc,
Expand Down
67 changes: 54 additions & 13 deletions ext/nokogiri/xml_document.c
Expand Up @@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
}

static void
mark(xmlDocPtr doc)
mark(void *data)
{
xmlDocPtr doc = (xmlDocPtr)data;
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
if (tuple) {
rb_gc_mark(tuple->doc);
Expand All @@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
}

static void
dealloc(xmlDocPtr doc)
dealloc(void *data)
{
xmlDocPtr doc = (xmlDocPtr)data;
st_table *node_hash;

node_hash = DOC_UNLINKED_NODE_HASH(doc);
Expand All @@ -84,6 +86,45 @@ dealloc(xmlDocPtr doc)
xmlFreeDoc(doc);
}

static size_t
memsize_node(const xmlNodePtr node)
{
/* note we don't count namespace definitions, just going for a good-enough number here */
xmlNodePtr child;
size_t memsize = 0;

memsize += xmlStrlen(node->name);
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
memsize += sizeof(xmlAttr) + memsize_node(child);
}
if (node->type == XML_TEXT_NODE) {
memsize += xmlStrlen(node->content);
}
for (child = node->children; child; child = child->next) {
memsize += sizeof(xmlNode) + memsize_node(child);
}
return memsize;
}

static size_t
memsize(const void *data)
{
xmlDocPtr doc = (const xmlDocPtr)data;
size_t memsize = sizeof(xmlDoc);
/* This may not account for all memory use */
memsize += memsize_node((xmlNodePtr)doc);
return memsize;
}

const rb_data_type_t noko_xml_document_data_type = {
.wrap_struct_name = "Nokogiri::XML::Document",
.function = {
.dmark = mark,
.dfree = dealloc,
.dsize = memsize,
}
};

static void
recursively_remove_namespaces_from_node(xmlNodePtr node)
{
Expand Down Expand Up @@ -127,7 +168,7 @@ static VALUE
url(VALUE self)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }

Expand All @@ -146,7 +187,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
xmlDocPtr c_document;
xmlNodePtr c_new_root = NULL, c_current_root;

Data_Get_Struct(self, xmlDoc, c_document);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_document);

c_current_root = xmlDocGetRootElement(c_document);
if (c_current_root) {
Expand Down Expand Up @@ -190,7 +231,7 @@ rb_xml_document_root(VALUE self)
xmlDocPtr c_document;
xmlNodePtr c_root;

Data_Get_Struct(self, xmlDoc, c_document);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_document);

c_root = xmlDocGetRootElement(c_document);
if (!c_root) {
Expand All @@ -210,7 +251,7 @@ static VALUE
set_encoding(VALUE self, VALUE encoding)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

if (doc->encoding) {
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
Expand All @@ -231,7 +272,7 @@ static VALUE
encoding(VALUE self)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

if (!doc->encoding) { return Qnil; }
return NOKOGIRI_STR_NEW2(doc->encoding);
Expand All @@ -247,7 +288,7 @@ static VALUE
version(VALUE self)
{
xmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

if (!doc->version) { return Qnil; }
return NOKOGIRI_STR_NEW2(doc->version);
Expand Down Expand Up @@ -369,7 +410,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
level = INT2NUM((long)1);
}

Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

dup = xmlCopyDoc(doc, (int)NUM2INT(level));

Expand Down Expand Up @@ -443,7 +484,7 @@ static VALUE
remove_namespaces_bang(VALUE self)
{
xmlDocPtr doc ;
Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

recursively_remove_namespaces_from_node((xmlNodePtr)doc);
return self;
Expand Down Expand Up @@ -471,7 +512,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
xmlEntityPtr ptr;
xmlDocPtr doc ;

Data_Get_Struct(self, xmlDoc, doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, doc);

rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
&content);
Expand Down Expand Up @@ -559,7 +600,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
}
}

Data_Get_Struct(self, xmlDoc, c_doc);
TypedData_Get_Struct(self, xmlDoc, &noko_xml_document_data_type, c_doc);

rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
Expand Down Expand Up @@ -607,7 +648,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
klass = cNokogiriXmlDocument;
}

rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);

tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
tuple->doc = rb_document;
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_document_fragment.c
Expand Up @@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)

rb_scan_args(argc, argv, "1*", &document, &rest);

Data_Get_Struct(document, xmlDoc, xml_doc);
TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);

node = xmlNewDocFragment(xml_doc->doc);

Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_entity_reference.c
Expand Up @@ -20,7 +20,7 @@ new (int argc, VALUE *argv, VALUE klass)

rb_scan_args(argc, argv, "2*", &document, &name, &rest);

Data_Get_Struct(document, xmlDoc, xml_doc);
TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);

node = xmlNewReference(
xml_doc,
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_node.c
Expand Up @@ -984,7 +984,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
if (n_args < 2) {
new_parent_doc = node->doc;
} else {
Data_Get_Struct(r_new_parent_doc, xmlDoc, new_parent_doc);
TypedData_Get_Struct(r_new_parent_doc, xmlDoc, &noko_xml_document_data_type, new_parent_doc);
}

dup = xmlDocCopyNode(node, new_parent_doc, level);
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_processing_instruction.c
Expand Up @@ -22,7 +22,7 @@ new (int argc, VALUE *argv, VALUE klass)

rb_scan_args(argc, argv, "3*", &document, &name, &content, &rest);

Data_Get_Struct(document, xmlDoc, xml_doc);
TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, xml_doc);

node = xmlNewDocPI(
xml_doc,
Expand Down
4 changes: 2 additions & 2 deletions ext/nokogiri/xml_relax_ng.c
Expand Up @@ -23,7 +23,7 @@ validate_document(VALUE self, VALUE document)
xmlRelaxNGValidCtxtPtr valid_ctxt;

Data_Get_Struct(self, xmlRelaxNG, schema);
Data_Get_Struct(document, xmlDoc, doc);
TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, doc);

errors = rb_ary_new();

Expand Down Expand Up @@ -127,7 +127,7 @@ from_document(int argc, VALUE *argv, VALUE klass)

scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);

Data_Get_Struct(document, xmlDoc, doc);
TypedData_Get_Struct(document, xmlDoc, &noko_xml_document_data_type, doc);
doc = doc->doc; /* In case someone passes us a node. ugh. */

if (scanned_args == 1) {
Expand Down
11 changes: 8 additions & 3 deletions ext/nokogiri/xslt_stylesheet.c
Expand Up @@ -79,7 +79,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
xmlDocPtr xml, xml_cpy;
VALUE errstr, exception;
xsltStylesheetPtr ss ;
Data_Get_Struct(xmldocobj, xmlDoc, xml);
TypedData_Get_Struct(xmldocobj, xmlDoc, &noko_xml_document_data_type, xml);

errstr = rb_str_new(0, 0);
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
Expand Down Expand Up @@ -114,7 +114,12 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
int doc_len ;
VALUE rval ;

Data_Get_Struct(xmlobj, xmlDoc, xml);
TypedData_Get_Struct(
xmlobj,
xmlDoc,
&noko_xml_document_data_type,
xml
);
TypedData_Get_Struct(
self,
nokogiriXsltStylesheetTuple,
Expand Down Expand Up @@ -265,7 +270,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)

Check_Type(paramobj, T_ARRAY);

Data_Get_Struct(xmldoc, xmlDoc, xml);
TypedData_Get_Struct(xmldoc, xmlDoc, &noko_xml_document_data_type, xml);
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);

param_len = RARRAY_LEN(paramobj);
Expand Down
41 changes: 41 additions & 0 deletions test/test_memory_leak.rb
Expand Up @@ -272,6 +272,47 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
end
end # if NOKOGIRI_GC

def test_object_space_memsize_of
require "objspace"
skip("memsize_of not defined") unless ObjectSpace.respond_to?(:memsize_of)

base_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
<root>
<child>asdf</child>
</root>
XML

more_children_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
<root>
<child>asdf</child>
<child>asdf</child>
<child>asdf</child>
</root>
XML
assert(more_children_size > base_size, "adding children should increase memsize")

attributes_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
<root>
<child a="b" c="d">asdf</child>
</root>
XML
assert(attributes_size > base_size, "adding attributes should increase memsize")

string_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
<root>
<child>asdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdfasdf</child>
</root>
XML
assert(string_size > base_size, "longer strings should increase memsize")

bigger_name_size = ObjectSpace.memsize_of(Nokogiri::XML(<<~XML))
<root>
<superduperamazingchild>asdf</superduperamazingchild>
</root>
XML
assert(bigger_name_size > base_size, "longer tags should increase memsize")
end

module MemInfo
# from https://stackoverflow.com/questions/7220896/get-current-ruby-process-memory-usage
# this is only going to work on linux
Expand Down