diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5085fd3294..9f7ea16d0c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -37,6 +37,7 @@ This release ends support for:
* XML::Builder blocks restore context properly when exceptions are raised. [[#2372](https://github.com/sparklemotion/nokogiri/issues/2372)] (Thanks, [@ric2b](https://github.com/ric2b) and [@rinthedev](https://github.com/rinthedev)!)
* Error recovery from in-context parsing (e.g., `Node#parse`) now always uses the correct `DocumentFragment` class. Previously `Nokogiri::HTML4::DocumentFragment` was always used, even for XML documents. [[#1158](https://github.com/sparklemotion/nokogiri/issues/1158)]
* `DocumentFragment#>` now works properly, matching a CSS selector against only the fragment roots. [[#1857](https://github.com/sparklemotion/nokogiri/issues/1857)]
+* `XML::DocumentFragment#errors` now correctly contains any parsing errors encountered. Previously this was always empty. (Note that `HTML::DocumentFragment#errors` did not need to be fixed.)
* [CRuby] Fix memory leak in `Document#canonicalize` when inclusive namespaces are passed in. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
* [CRuby] Fix memory leak in `Document#canonicalize` when an argument type error is raised. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
* [CRuby] Fix memory leak in `EncodingHandler` where iconv handlers were not being cleaned up. [[#2345](https://github.com/sparklemotion/nokogiri/issues/2345)]
@@ -48,6 +49,7 @@ This release ends support for:
### Improved
+* `{XML,HTML4}::DocumentFragment` constructors all now take an optional parse options parameter or block (similar to Document constructors). [[#1692](https://github.com/sparklemotion/nokogiri/issues/1692)] (Thanks, [@JackMc](https://github.com/JackMc)!)
* [CRuby] XML::Reader#encoding will return the encoding detected by the parser when it's not passed to the constructor. [[#980](https://github.com/sparklemotion/nokogiri/issues/980)]
* [CRuby] Handle abruptly-closed HTML comments as WHATWG recommends for browsers. (Thanks to HackerOne user [tehryanx](https://hackerone.com/tehryanx?type=user) for reporting this!)
* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
diff --git a/ext/java/nokogiri/XmlDocumentFragment.java b/ext/java/nokogiri/XmlDocumentFragment.java
index 7d8572605f..4281df113e 100644
--- a/ext/java/nokogiri/XmlDocumentFragment.java
+++ b/ext/java/nokogiri/XmlDocumentFragment.java
@@ -17,6 +17,7 @@
import org.jruby.RubyString;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
+import org.jruby.runtime.Block;
import org.jruby.runtime.Helpers;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
@@ -46,9 +47,9 @@ public class XmlDocumentFragment extends XmlNode
super(ruby, klazz);
}
- @JRubyMethod(name = "new", meta = true, required = 1, optional = 2)
+ @JRubyMethod(name = "new", meta = true, required = 1, optional = 3)
public static IRubyObject
- rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args)
+ rbNew(ThreadContext context, IRubyObject cls, IRubyObject[] args, Block block)
{
if (args.length < 1) {
throw context.runtime.newArgumentError(args.length, 1);
@@ -73,7 +74,7 @@ public class XmlDocumentFragment extends XmlNode
fragment.setDocument(context, doc);
fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment());
- Helpers.invoke(context, fragment, "initialize", args);
+ Helpers.invoke(context, fragment, "initialize", args, block);
return fragment;
}
diff --git a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
index 10cbc6f441..78118d1de7 100644
--- a/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
+++ b/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java
@@ -57,6 +57,6 @@ public class NokogiriStrictErrorHandler extends NokogiriErrorHandler
warning(String domain, String key, XMLParseException e) throws XMLParseException
{
if (!nowarning) { throw e; }
- if (!usesNekoHtml(domain)) { addError(e); }
+ else { addError(e); }
}
}
diff --git a/ext/nokogiri/xml_document_fragment.c b/ext/nokogiri/xml_document_fragment.c
index f509815fda..81639576d7 100644
--- a/ext/nokogiri/xml_document_fragment.c
+++ b/ext/nokogiri/xml_document_fragment.c
@@ -28,8 +28,6 @@ new (int argc, VALUE *argv, VALUE klass)
rb_node = noko_xml_node_wrap(klass, node);
rb_obj_call_init(rb_node, argc, argv);
- if (rb_block_given_p()) { rb_yield(rb_node); }
-
return rb_node;
}
diff --git a/lib/nokogiri/html4.rb b/lib/nokogiri/html4.rb
index fe543d6d88..e755391a9c 100644
--- a/lib/nokogiri/html4.rb
+++ b/lib/nokogiri/html4.rb
@@ -26,8 +26,8 @@ def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT
####
# Parse a fragment from +string+ in to a NodeSet.
- def fragment(string, encoding = nil)
- HTML4::DocumentFragment.parse(string, encoding)
+ def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
+ HTML4::DocumentFragment.parse(string, encoding, options, &block)
end
end
diff --git a/lib/nokogiri/html4/document.rb b/lib/nokogiri/html4/document.rb
index ed0ac11d2a..40ff91740e 100644
--- a/lib/nokogiri/html4/document.rb
+++ b/lib/nokogiri/html4/document.rb
@@ -159,7 +159,6 @@ class << self
# Nokogiri::XML::ParseOptions.
def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
-
yield options if block_given?
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
diff --git a/lib/nokogiri/html4/document_fragment.rb b/lib/nokogiri/html4/document_fragment.rb
index 838a4f776a..9bfddef5da 100644
--- a/lib/nokogiri/html4/document_fragment.rb
+++ b/lib/nokogiri/html4/document_fragment.rb
@@ -5,7 +5,7 @@ module HTML4
class DocumentFragment < Nokogiri::XML::DocumentFragment
####
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
- def self.parse(tags, encoding = nil)
+ def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
doc = HTML4::Document.new
encoding ||= if tags.respond_to?(:encoding)
@@ -21,15 +21,18 @@ def self.parse(tags, encoding = nil)
doc.encoding = encoding
- new(doc, tags)
+ new(doc, tags, nil, options, &block)
end
- def initialize(document, tags = nil, ctx = nil)
+ def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
return self unless tags
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
+ yield options if block_given?
+
if ctx
preexisting_errors = document.errors.dup
- node_set = ctx.parse("
#{tags}
")
+ node_set = ctx.parse("#{tags}
", options)
node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
self.errors = document.errors - preexisting_errors
else
@@ -40,7 +43,7 @@ def initialize(document, tags = nil, ctx = nil)
"/html/body/node()"
end
- temp_doc = HTML4::Document.parse("#{tags}", nil, document.encoding)
+ temp_doc = HTML4::Document.parse("#{tags}", nil, document.encoding, options)
temp_doc.xpath(path).each { |child| child.parent = self }
self.errors = temp_doc.errors
end
diff --git a/lib/nokogiri/xml.rb b/lib/nokogiri/xml.rb
index 0b01daefa2..2bcbc508c2 100644
--- a/lib/nokogiri/xml.rb
+++ b/lib/nokogiri/xml.rb
@@ -22,7 +22,6 @@ class << self
# Nokogiri::XML::Reader for mor information
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
- # Give the options to the user
yield options if block_given?
if string_or_io.respond_to?(:read)
@@ -39,8 +38,8 @@ def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML,
####
# Parse a fragment from +string+ in to a NodeSet.
- def fragment(string)
- XML::DocumentFragment.parse(string)
+ def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
+ XML::DocumentFragment.parse(string, options, &block)
end
end
end
diff --git a/lib/nokogiri/xml/document.rb b/lib/nokogiri/xml/document.rb
index fe3fea9754..24d7720d26 100644
--- a/lib/nokogiri/xml/document.rb
+++ b/lib/nokogiri/xml/document.rb
@@ -47,7 +47,6 @@ class Document < Nokogiri::XML::Node
#
def self.parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
-
yield options if block_given?
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
diff --git a/lib/nokogiri/xml/document_fragment.rb b/lib/nokogiri/xml/document_fragment.rb
index 36070ac4e4..b5b3e6a667 100644
--- a/lib/nokogiri/xml/document_fragment.rb
+++ b/lib/nokogiri/xml/document_fragment.rb
@@ -3,26 +3,36 @@
module Nokogiri
module XML
class DocumentFragment < Nokogiri::XML::Node
+ ####
+ # Create a Nokogiri::XML::DocumentFragment from +tags+
+ def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
+ new(XML::Document.new, tags, nil, options, &block)
+ end
+
##
# Create a new DocumentFragment from +tags+.
#
# If +ctx+ is present, it is used as a context node for the
# subtree created, e.g., namespaces will be resolved relative
# to +ctx+.
- def initialize(document, tags = nil, ctx = nil)
+ def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
return self unless tags
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
+ yield options if block_given?
+
children = if ctx
# Fix for issue#490
if Nokogiri.jruby?
# fix for issue #770
- ctx.parse("#{tags}").children
+ ctx.parse("#{tags}", options).children
else
- ctx.parse(tags)
+ ctx.parse(tags, options)
end
else
- XML::Document.parse("#{tags}") \
- .xpath("/root/node()")
+ wrapper_doc = XML::Document.parse("#{tags}", nil, nil, options)
+ self.errors = wrapper_doc.errors
+ wrapper_doc.xpath("/root/node()")
end
children.each { |child| child.parent = self }
end
@@ -125,14 +135,6 @@ def search(*rules)
alias_method :serialize, :to_s
- class << self
- ####
- # Create a Nokogiri::XML::DocumentFragment from +tags+
- def parse(tags)
- new(XML::Document.new, tags)
- end
- end
-
# A list of Nokogiri::XML::SyntaxError found when parsing a document
def errors
document.errors
diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb
index 95cfbe4dd1..e6c7b9819e 100644
--- a/lib/nokogiri/xml/node.rb
+++ b/lib/nokogiri/xml/node.rb
@@ -359,8 +359,6 @@ def namespace=(ns)
# passed to it, allowing more convenient modification of the parser options.
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
-
- # give options to user
yield options if block_given?
# call c extension
@@ -946,10 +944,7 @@ def parse(string_or_io, options = nil)
end
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
- if Integer === options
- options = Nokogiri::XML::ParseOptions.new(options)
- end
- # Give the options to the user
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?
contents = if string_or_io.respond_to?(:read)
diff --git a/test/html4/test_document_fragment.rb b/test/html4/test_document_fragment.rb
index a659d1445e..d4df392eaf 100644
--- a/test/html4/test_document_fragment.rb
+++ b/test/html4/test_document_fragment.rb
@@ -306,6 +306,147 @@ def test_dup_should_create_an_html_document_fragment
assert_instance_of(Nokogiri::HTML::DocumentFragment, duplicate)
end
+ describe "parse options" do
+ let(:html4_default) do
+ Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML)
+ end
+
+ let(:html4_strict) do
+ Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_HTML).norecover
+ end
+
+ let(:input) { "foo
foo", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::HTML4.fragment(input, nil, html4_default)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4.fragment(input, nil, html4_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::HTML4.fragment(input) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4.fragment(input, &:norecover)
+ end
+ end
+ end
+
+ describe "HTML4::DocumentFragment.parse" do
+ it "has sane defaults" do
+ frag = Nokogiri::HTML4::DocumentFragment.parse(input)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_default)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4::DocumentFragment.parse(input, nil, html4_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::HTML4::DocumentFragment.parse(input) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4::DocumentFragment.parse(input, &:norecover)
+ end
+ end
+ end
+
+ describe "HTML4::DocumentFragment.new" do
+ describe "without a context node" do
+ it "has sane defaults" do
+ frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_default)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, nil, html4_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4::DocumentFragment.new(Nokogiri::HTML4::Document.new, input, &:norecover)
+ end
+ end
+ end
+
+ describe "with a context node" do
+ let(:document) { Nokogiri::HTML4::Document.parse("") }
+ let(:context_node) { document.at_css("context") }
+
+ it "has sane defaults" do
+ frag = Nokogiri::HTML4::DocumentFragment.new(document, input, context_node)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, html4_default)
+ assert_equal("foo
", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, html4_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::HTML4::DocumentFragment.new(document, input, context_node) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::HTML4::DocumentFragment.new(document, input, context_node, &:norecover)
+ end
+ end
+ end
+ end
+ end
+
describe "subclassing" do
let(:klass) do
Class.new(Nokogiri::HTML::DocumentFragment) do
diff --git a/test/xml/test_document_fragment.rb b/test/xml/test_document_fragment.rb
index 9069ac516e..073ea1d088 100644
--- a/test/xml/test_document_fragment.rb
+++ b/test/xml/test_document_fragment.rb
@@ -332,6 +332,147 @@ def test_for_libxml_in_context_memory_badness_when_encountering_encoding_errors
end
end
+ describe "parse options" do
+ let(:xml_default) do
+ Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_XML)
+ end
+
+ let(:xml_strict) do
+ Nokogiri::XML::ParseOptions.new(Nokogiri::XML::ParseOptions::DEFAULT_XML).norecover
+ end
+
+ let(:input) { "foofoo", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::XML.fragment(input, xml_default)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML.fragment(input, xml_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::XML.fragment(input) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML.fragment(input, &:norecover)
+ end
+ end
+ end
+
+ describe "XML::DocumentFragment.parse" do
+ it "has sane defaults" do
+ frag = Nokogiri::XML::DocumentFragment.parse(input)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::XML::DocumentFragment.parse(input, xml_default)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML::DocumentFragment.parse(input, xml_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::XML::DocumentFragment.parse(input) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML::DocumentFragment.parse(input, &:norecover)
+ end
+ end
+ end
+
+ describe "XML::DocumentFragment.new" do
+ describe "without a context node" do
+ it "has sane defaults" do
+ frag = Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new, input)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new, input, nil, xml_default)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new, input, nil, xml_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new, input) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML::DocumentFragment.new(Nokogiri::XML::Document.new, input, &:norecover)
+ end
+ end
+ end
+
+ describe "with a context node" do
+ let(:document) { Nokogiri::XML::Document.parse("") }
+ let(:context_node) { document.at_css("context") }
+
+ it "has sane defaults" do
+ frag = Nokogiri::XML::DocumentFragment.new(document, input, context_node)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+ end
+
+ it "accepts options" do
+ frag = Nokogiri::XML::DocumentFragment.new(document, input, context_node, xml_default)
+ assert_equal("foo", frag.to_html)
+ refute_empty(frag.errors)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML::DocumentFragment.new(document, input, context_node, xml_strict)
+ end
+ end
+
+ it "takes a config block" do
+ default_config = nil
+ Nokogiri::XML::DocumentFragment.new(document, input, context_node) do |config|
+ default_config = config
+ end
+ refute(default_config.strict?)
+
+ assert_raises(Nokogiri::SyntaxError) do
+ Nokogiri::XML::DocumentFragment.new(document, input, context_node, &:norecover)
+ end
+ end
+ end
+ end
+ end
+
describe "subclassing" do
let(:klass) do
Class.new(Nokogiri::XML::DocumentFragment) do