Skip to content

Commit

Permalink
feat: DocumentFragment constructors all take parse options param
Browse files Browse the repository at this point in the history
Similar to Document constructors, we can now pass in parse options as
a parameter or via a config block.

Fixes #1692
Fixes #1844

Co-authored-by: Jack McCracken <jack.mccracken@shopify.com>
  • Loading branch information
flavorjones and JackMc committed Dec 24, 2021
1 parent 5fcea42 commit 71bd4b2
Show file tree
Hide file tree
Showing 10 changed files with 333 additions and 28 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -37,6 +37,7 @@ This release ends support for:
* XML::Builder blocks restore context properly when exceptions are raised. [[#2372](https://github.com/sparklemotion/nokogiri/issues/2372)] (Thanks, [@ric2b](https://github.com/ric2b) and [@rinthedev](https://github.com/rinthedev)!)
* Error recovery from in-context parsing (e.g., `Node#parse`) now always uses the correct `DocumentFragment` class. Previously `Nokogiri::HTML4::DocumentFragment` was always used, even for XML documents. [[#1158](https://github.com/sparklemotion/nokogiri/issues/1158)]
* `DocumentFragment#>` now works properly, matching a CSS selector against only the fragment roots. [[#1857](https://github.com/sparklemotion/nokogiri/issues/1857)]
* `XML::DocumentFragment#errors` now correctly contains any parsing errors encountered. Previously this was always empty. (Note that `HTML::DocumentFragment#errors` did not need to be fixed.)
* [CRuby] Fix memory leak in `Document#canonicalize` when inclusive namespaces are passed in. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
* [CRuby] Fix memory leak in `Document#canonicalize` when an argument type error is raised. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
* [CRuby] Fix memory leak in `EncodingHandler` where iconv handlers were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
Expand All @@ -48,6 +49,7 @@ This release ends support for:

### Improved

* `{XML,HTML4}::DocumentFragment` constructors all now take an optional parse options parameter or block (similar to Document constructors). [[#1692](https://github.com/sparklemotion/nokogiri/issues/1692)] (Thanks, [@JackMc](https://github.com/JackMc)!)
* [CRuby] XML::Reader#encoding will return the encoding detected by the parser when it's not passed to the constructor. [[#980](https://github.com/sparklemotion/nokogiri/issues/980)]
* [CRuby] Handle abruptly-closed HTML comments as WHATWG recommends for browsers. (Thanks to HackerOne user [tehryanx](https://hackerone.com/tehryanx?type=user) for reporting this!)
* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
Expand Down
7 changes: 4 additions & 3 deletions ext/java/nokogiri/XmlDocumentFragment.java
Expand Up @@ -17,6 +17,7 @@
import org.jruby.RubyString;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.Block;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
Expand Down Expand Up @@ -46,9 +47,9 @@ public class XmlDocumentFragment extends XmlNode
super(ruby, klazz);
}

@JRubyMethod(name = "new", meta = true, required = 1, optional = 2)
@JRubyMethod(name = "new", meta = true, required = 1, optional = 3)
public static IRubyObject
rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args)
rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args, Block block)
{
if (args.length < 1) {
throw context.runtime.newArgumentError(args.length, 1);
Expand All @@ -73,7 +74,7 @@ public class XmlDocumentFragment extends XmlNode
fragment.setDocument(context, doc);
fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());

Helpers.invoke(context, fragment, "initialize", args);
Helpers.invoke(context, fragment, "initialize", args, block);
return fragment;
}

Expand Down
Expand Up @@ -57,6 +57,6 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler
warning(String domain, String key, XMLParseException e) throws XMLParseException
{
if (!nowarning) { throw e; }
if (!usesNekoHtml(domain)) { addError(e); }
else { addError(e); }
}
}
2 changes: 0 additions & 2 deletions ext/nokogiri/xml_document_fragment.c
Expand Up @@ -28,8 +28,6 @@ new (int argc, VALUE *argv, VALUE klass)
rb_node = noko_xml_node_wrap(klass, node);
rb_obj_call_init(rb_node, argc, argv);

if (rb_block_given_p()) { rb_yield(rb_node); }

return rb_node;
}

Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/html4.rb
Expand Up @@ -26,8 +26,8 @@ def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT

####
# Parse a fragment from +string+ in to a NodeSet.
def fragment(string, encoding = nil)
HTML4::DocumentFragment.parse(string, encoding)
def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
HTML4::DocumentFragment.parse(string, encoding, options, &block)
end
end

Expand Down
13 changes: 8 additions & 5 deletions lib/nokogiri/html4/document_fragment.rb
Expand Up @@ -5,7 +5,7 @@ module HTML4
class DocumentFragment < Nokogiri::XML::DocumentFragment
####
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
def self.parse(tags, encoding = nil)
def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
doc = HTML4::Document.new

encoding ||= if tags.respond_to?(:encoding)
Expand All @@ -21,15 +21,18 @@ def self.parse(tags, encoding = nil)

doc.encoding = encoding

new(doc, tags)
new(doc, tags, nil, options, &block)
end

def initialize(document, tags = nil, ctx = nil)
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
return self unless tags

options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?

if ctx
preexisting_errors = document.errors.dup
node_set = ctx.parse("<div>#{tags}</div>")
node_set = ctx.parse("<div>#{tags}</div>", options)
node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
self.errors = document.errors - preexisting_errors
else
Expand All @@ -40,7 +43,7 @@ def initialize(document, tags = nil, ctx = nil)
"/html/body/node()"
end

temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
temp_doc.xpath(path).each { |child| child.parent = self }
self.errors = temp_doc.errors
end
Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/xml.rb
Expand Up @@ -39,8 +39,8 @@ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML,

####
# Parse a fragment from +string+ in to a NodeSet.
def fragment(string)
XML::DocumentFragment.parse(string)
def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
XML::DocumentFragment.parse(string, options, &block)
end
end
end
Expand Down
28 changes: 15 additions & 13 deletions lib/nokogiri/xml/document_fragment.rb
Expand Up @@ -3,26 +3,36 @@
module Nokogiri
module XML
class DocumentFragment < Nokogiri::XML::Node
####
# Create a Nokogiri::XML::DocumentFragment from +tags+
def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
new(XML::Document.new, tags, nil, options, &block)
end

##
# Create a new DocumentFragment from +tags+.
#
# If +ctx+ is present, it is used as a context node for the
# subtree created, e.g., namespaces will be resolved relative
# to +ctx+.
def initialize(document, tags = nil, ctx = nil)
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
return self unless tags

options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?

children = if ctx
# Fix for issue#490
if Nokogiri.jruby?
# fix for issue #770
ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>").children
ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
else
ctx.parse(tags)
ctx.parse(tags, options)
end
else
XML::Document.parse("<root>#{tags}</root>") \
.xpath("/root/node()")
wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
self.errors = wrapper_doc.errors
wrapper_doc.xpath("/root/node()")
end
children.each { |child| child.parent = self }
end
Expand Down Expand Up @@ -125,14 +135,6 @@ def search(*rules)

alias_method :serialize, :to_s

class << self
####
# Create a Nokogiri::XML::DocumentFragment from +tags+
def parse(tags)
new(XML::Document.new, tags)
end
end

# A list of Nokogiri::XML::SyntaxError found when parsing a document
def errors
document.errors
Expand Down
149 changes: 149 additions & 0 deletions test/html4/test_document_fragment.rb
Expand Up @@ -306,6 +306,155 @@ def test_dup_should_create_an_html_document_fragment
assert_instance_of(Nokogiri::HTML::DocumentFragment, duplicate)
end

describe "parse options" do
let(:html4_default) do
Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML)
end

let(:html4_strict) do
Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover
end

let(:input) { "<div>foo</div" }

it "sets the test up correctly" do
assert(html4_strict.strict?)
end

describe "HTML4.fragment" do
it "has sane defaults" do
frag = Nokogiri::HTML4.fragment(input)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4.fragment(input, nil, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4.fragment(input, nil, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4.fragment(input) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4.fragment(input) do |config|
config.norecover
end
end
end
end

describe "HTML4::DocumentFragment.parse" do
it "has sane defaults" do
frag = Nokogiri::HTML4::DocumentFragment.parse(input)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4::DocumentFragment.parse(input) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.parse(input) do |config|
config.norecover
end
end
end
end

describe "HTML4::DocumentFragment.new" do
describe "without a context node" do
it "has sane defaults" do
frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config|
config.norecover
end
end
end
end

describe "with a context node" do
let (:document) { Nokogiri::HTML4::Document.parse("<context></context>") }
let (:context_node) { document.at_css("context") }

it "has sane defaults" do
frag = Nokogiri::HTML4::DocumentFragment.new(document, input, context_node)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4::DocumentFragment.new(document, input, context_node) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(document, input, context_node) do |config|
config.norecover
end
end
end
end
end
end

describe "subclassing" do
let(:klass) do
Class.new(Nokogiri::HTML::DocumentFragment) do
Expand Down

0 comments on commit 71bd4b2

Please sign in to comment.