Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: DocumentFragment constructors all take parse options param #2399

Merged
merged 1 commit into from Dec 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -37,6 +37,7 @@ This release ends support for:
* XML::Builder blocks restore context properly when exceptions are raised. [[#2372](https://github.com/sparklemotion/nokogiri/issues/2372)] (Thanks, [@ric2b](https://github.com/ric2b) and [@rinthedev](https://github.com/rinthedev)!)
* Error recovery from in-context parsing (e.g., `Node#parse`) now always uses the correct `DocumentFragment` class. Previously `Nokogiri::HTML4::DocumentFragment` was always used, even for XML documents. [[#1158](https://github.com/sparklemotion/nokogiri/issues/1158)]
* `DocumentFragment#>` now works properly, matching a CSS selector against only the fragment roots. [[#1857](https://github.com/sparklemotion/nokogiri/issues/1857)]
* `XML::DocumentFragment#errors` now correctly contains any parsing errors encountered. Previously this was always empty. (Note that `HTML::DocumentFragment#errors` did not need to be fixed.)
* [CRuby] Fix memory leak in `Document#canonicalize` when inclusive namespaces are passed in. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
* [CRuby] Fix memory leak in `Document#canonicalize` when an argument type error is raised. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
* [CRuby] Fix memory leak in `EncodingHandler` where iconv handlers were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
Expand All @@ -48,6 +49,7 @@ This release ends support for:

### Improved

* `{XML,HTML4}::DocumentFragment` constructors all now take an optional parse options parameter or block (similar to Document constructors). [[#1692](https://github.com/sparklemotion/nokogiri/issues/1692)] (Thanks, [@JackMc](https://github.com/JackMc)!)
* [CRuby] XML::Reader#encoding will return the encoding detected by the parser when it's not passed to the constructor. [[#980](https://github.com/sparklemotion/nokogiri/issues/980)]
* [CRuby] Handle abruptly-closed HTML comments as WHATWG recommends for browsers. (Thanks to HackerOne user [tehryanx](https://hackerone.com/tehryanx?type=user) for reporting this!)
* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
Expand Down
7 changes: 4 additions & 3 deletions ext/java/nokogiri/XmlDocumentFragment.java
Expand Up @@ -17,6 +17,7 @@
import org.jruby.RubyString;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.Block;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
Expand Down Expand Up @@ -46,9 +47,9 @@ public class XmlDocumentFragment extends XmlNode
super(ruby, klazz);
}

@JRubyMethod(name = "new", meta = true, required = 1, optional = 2)
@JRubyMethod(name = "new", meta = true, required = 1, optional = 3)
public static IRubyObject
rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args)
rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args, Block block)
{
if (args.length < 1) {
throw context.runtime.newArgumentError(args.length, 1);
Expand All @@ -73,7 +74,7 @@ public class XmlDocumentFragment extends XmlNode
fragment.setDocument(context, doc);
fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());

Helpers.invoke(context, fragment, "initialize", args);
Helpers.invoke(context, fragment, "initialize", args, block);
return fragment;
}

Expand Down
Expand Up @@ -57,6 +57,6 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler
warning(String domain, String key, XMLParseException e) throws XMLParseException
{
if (!nowarning) { throw e; }
if (!usesNekoHtml(domain)) { addError(e); }
else { addError(e); }
}
}
2 changes: 0 additions & 2 deletions ext/nokogiri/xml_document_fragment.c
Expand Up @@ -28,8 +28,6 @@ new (int argc, VALUE *argv, VALUE klass)
rb_node = noko_xml_node_wrap(klass, node);
rb_obj_call_init(rb_node, argc, argv);

if (rb_block_given_p()) { rb_yield(rb_node); }

return rb_node;
}

Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/html4.rb
Expand Up @@ -26,8 +26,8 @@ def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT

####
# Parse a fragment from +string+ in to a NodeSet.
def fragment(string, encoding = nil)
HTML4::DocumentFragment.parse(string, encoding)
def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
HTML4::DocumentFragment.parse(string, encoding, options, &block)
end
end

Expand Down
1 change: 0 additions & 1 deletion lib/nokogiri/html4/document.rb
Expand Up @@ -159,7 +159,6 @@ class << self
# Nokogiri::XML::ParseOptions.
def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options

yield options if block_given?

url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
Expand Down
13 changes: 8 additions & 5 deletions lib/nokogiri/html4/document_fragment.rb
Expand Up @@ -5,7 +5,7 @@ module HTML4
class DocumentFragment < Nokogiri::XML::DocumentFragment
####
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
def self.parse(tags, encoding = nil)
def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
doc = HTML4::Document.new

encoding ||= if tags.respond_to?(:encoding)
Expand All @@ -21,15 +21,18 @@ def self.parse(tags, encoding = nil)

doc.encoding = encoding

new(doc, tags)
new(doc, tags, nil, options, &block)
end

def initialize(document, tags = nil, ctx = nil)
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
return self unless tags

options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?

if ctx
preexisting_errors = document.errors.dup
node_set = ctx.parse("<div>#{tags}</div>")
node_set = ctx.parse("<div>#{tags}</div>", options)
node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
self.errors = document.errors - preexisting_errors
else
Expand All @@ -40,7 +43,7 @@ def initialize(document, tags = nil, ctx = nil)
"/html/body/node()"
end

temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
temp_doc.xpath(path).each { |child| child.parent = self }
self.errors = temp_doc.errors
end
Expand Down
5 changes: 2 additions & 3 deletions lib/nokogiri/xml.rb
Expand Up @@ -22,7 +22,6 @@ class << self
# Nokogiri::XML::Reader for mor information
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
# Give the options to the user
yield options if block_given?

if string_or_io.respond_to?(:read)
Expand All @@ -39,8 +38,8 @@ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML,

####
# Parse a fragment from +string+ in to a NodeSet.
def fragment(string)
XML::DocumentFragment.parse(string)
def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
XML::DocumentFragment.parse(string, options, &block)
end
end
end
Expand Down
1 change: 0 additions & 1 deletion lib/nokogiri/xml/document.rb
Expand Up @@ -47,7 +47,6 @@ class Document < Nokogiri::XML::Node
#
def self.parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options

yield options if block_given?

url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
Expand Down
28 changes: 15 additions & 13 deletions lib/nokogiri/xml/document_fragment.rb
Expand Up @@ -3,26 +3,36 @@
module Nokogiri
module XML
class DocumentFragment < Nokogiri::XML::Node
####
# Create a Nokogiri::XML::DocumentFragment from +tags+
def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
new(XML::Document.new, tags, nil, options, &block)
end

##
# Create a new DocumentFragment from +tags+.
#
# If +ctx+ is present, it is used as a context node for the
# subtree created, e.g., namespaces will be resolved relative
# to +ctx+.
def initialize(document, tags = nil, ctx = nil)
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
return self unless tags

options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?

children = if ctx
# Fix for issue#490
if Nokogiri.jruby?
# fix for issue #770
ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>").children
ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
else
ctx.parse(tags)
ctx.parse(tags, options)
end
else
XML::Document.parse("<root>#{tags}</root>") \
.xpath("/root/node()")
wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
self.errors = wrapper_doc.errors
wrapper_doc.xpath("/root/node()")
end
children.each { |child| child.parent = self }
end
Expand Down Expand Up @@ -125,14 +135,6 @@ def search(*rules)

alias_method :serialize, :to_s

class << self
####
# Create a Nokogiri::XML::DocumentFragment from +tags+
def parse(tags)
new(XML::Document.new, tags)
end
end

# A list of Nokogiri::XML::SyntaxError found when parsing a document
def errors
document.errors
Expand Down
7 changes: 1 addition & 6 deletions lib/nokogiri/xml/node.rb
Expand Up @@ -359,8 +359,6 @@ def namespace=(ns)
# passed to it, allowing more convenient modification of the parser options.
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options

# give options to user
yield options if block_given?

# call c extension
Expand Down Expand Up @@ -946,10 +944,7 @@ def parse(string_or_io, options = nil)
end

options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
if Integer === options
options = Nokogiri::XML::ParseOptions.new(options)
end
# Give the options to the user
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?

contents = if string_or_io.respond_to?(:read)
Expand Down
141 changes: 141 additions & 0 deletions test/html4/test_document_fragment.rb
Expand Up @@ -306,6 +306,147 @@ def test_dup_should_create_an_html_document_fragment
assert_instance_of(Nokogiri::HTML::DocumentFragment, duplicate)
end

describe "parse options" do
let(:html4_default) do
Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML)
end

let(:html4_strict) do
Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover
end

let(:input) { "<div>foo</div" }

it "sets the test up correctly" do
assert(html4_strict.strict?)
end

describe "HTML4.fragment" do
it "has sane defaults" do
frag = Nokogiri::HTML4.fragment(input)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4.fragment(input, nil, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4.fragment(input, nil, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4.fragment(input) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4.fragment(input, &:norecover)
end
end
end

describe "HTML4::DocumentFragment.parse" do
it "has sane defaults" do
frag = Nokogiri::HTML4::DocumentFragment.parse(input)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4::DocumentFragment.parse(input) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.parse(input, &:norecover)
end
end
end

describe "HTML4::DocumentFragment.new" do
describe "without a context node" do
it "has sane defaults" do
frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, &:norecover)
end
end
end

describe "with a context node" do
let(:document) { Nokogiri::HTML4::Document.parse("<context></context>") }
let(:context_node) { document.at_css("context") }

it "has sane defaults" do
frag = Nokogiri::HTML4::DocumentFragment.new(document, input, context_node)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)
end

it "accepts options" do
frag = Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, html4_default)
assert_equal("<div>foo</div>", frag.to_html)
refute_empty(frag.errors)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, html4_strict)
end
end

it "takes a config block" do
default_config = nil
Nokogiri::HTML4::DocumentFragment.new(document, input, context_node) do |config|
default_config = config
end
refute(default_config.strict?)

assert_raises(Nokogiri::SyntaxError) do
Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, &:norecover)
end
end
end
end
end

describe "subclassing" do
let(:klass) do
Class.new(Nokogiri::HTML::DocumentFragment) do
Expand Down