Skip to content
This repository has been archived by the owner on Aug 26, 2023. It is now read-only.

Commit

Permalink
Include line numbers from the parser.
Browse files Browse the repository at this point in the history
Neither libxml2 nor Nokogiri contain an API for setting the line numbers
for a node. When the libxml2 headers are available, the line numbers can
be set directly in the node structure.
  • Loading branch information
stevecheckoway committed Sep 2, 2018
1 parent b679cc4 commit 1b37adf
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 6 deletions.
30 changes: 24 additions & 6 deletions ext/nokogumbo/nokogumbo.c
Expand Up @@ -290,6 +290,17 @@ static xmlNsPtr lookup_or_add_ns (
#endif
}

static void set_line(xmlNodePtr node, size_t line) {
#if NGLIB
// libxml2 uses 65535 to mean look elsewhere for the line number on some
// nodes.
if (line < 65535)
node->line = (unsigned short)line;
#else
// XXX: If Nokogiri gets a `#line=` method, we'll use that.
#endif
}

// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted
// at gumbo_node.
static void build_tree (
Expand Down Expand Up @@ -323,31 +334,38 @@ static void build_tree (
continue;
}
const GumboNode *gumbo_child = children->data[child_index++];
xmlNodePtr xml_child;

switch (gumbo_child->type) {
case GUMBO_NODE_DOCUMENT:
abort(); // Bug in Gumbo.

case GUMBO_NODE_TEXT:
case GUMBO_NODE_WHITESPACE:
xmlAddChild(xml_node, xmlNewDocText(doc, BAD_CAST gumbo_child->v.text.text));
xml_child = xmlNewDocText(doc, BAD_CAST gumbo_child->v.text.text);
set_line(xml_child, gumbo_child->v.text.start_pos.line);
xmlAddChild(xml_node, xml_child);
break;

case GUMBO_NODE_CDATA:
xmlAddChild(xml_node,
xmlNewCDataBlock(doc, BAD_CAST gumbo_child->v.text.text,
(int) strlen(gumbo_child->v.text.text)));
xml_child = xmlNewCDataBlock(doc, BAD_CAST gumbo_child->v.text.text,
(int) strlen(gumbo_child->v.text.text));
set_line(xml_child, gumbo_child->v.text.start_pos.line);
xmlAddChild(xml_node, xml_child);
break;

case GUMBO_NODE_COMMENT:
xmlAddChild(xml_node, xmlNewDocComment(doc, BAD_CAST gumbo_child->v.text.text));
xml_child = xmlNewDocComment(doc, BAD_CAST gumbo_child->v.text.text);
set_line(xml_child, gumbo_child->v.text.start_pos.line);
xmlAddChild(xml_node, xml_child);
break;

case GUMBO_NODE_TEMPLATE:
// XXX: Should create a template element and a new DocumentFragment
case GUMBO_NODE_ELEMENT:
{
xmlNodePtr xml_child = xmlNewDocNode(doc, NIL, BAD_CAST gumbo_child->v.element.name, NULL);
xml_child = xmlNewDocNode(doc, NIL, BAD_CAST gumbo_child->v.element.name, NULL);
set_line(xml_child, gumbo_child->v.text.start_pos.line);
if (xml_root == NIL)
xml_root = xml_child;
xmlNsPtr ns = NIL;
Expand Down
5 changes: 5 additions & 0 deletions test/test_nokogumbo.rb
Expand Up @@ -220,6 +220,11 @@ def test_document_encoding
assert_equal "Кирилические символы", doc.at('body').text.gsub(/\n\s+/,'')
end

def test_line_numbers
doc = Nokogiri::HTML5(buffer)
assert_includes [0, 8], doc.at('h1').line
assert_includes [0, 10], doc.at('span').line
end

private

Expand Down

0 comments on commit 1b37adf

Please sign in to comment.