diff --git a/CHANGELOG.md b/CHANGELOG.md index eedb439931..2408760a24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## next / unreleased + +### Changed + +* Introduce `Nokogiri::XML::ParseOptions::DEFAULT_XSLT` which adds the libxslt-preferred options of `NOENT | DTDLOAD | DTDATTR | NOCDATA` to `ParseOptions::DEFAULT_XML`. +* `Nokogiri.XSLT` parses the stylesheet using `ParseOptions::DEFAULT_XSLT`, which should make some edge-case XSL transformations match libxslt's default behavior. [[#1940](https://github.com/sparklemotion/nokogiri/issues/1940)] + + ## 1.11.3 / 2021-04-07 ### Fixed diff --git a/lib/nokogiri/xml/parse_options.rb b/lib/nokogiri/xml/parse_options.rb index a266d5ba07..36186ab7b9 100644 --- a/lib/nokogiri/xml/parse_options.rb +++ b/lib/nokogiri/xml/parse_options.rb @@ -71,6 +71,8 @@ class ParseOptions # the default options used for parsing XML documents DEFAULT_XML = RECOVER | NONET + # the default options used for parsing XSLT stylesheets + DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA # the default options used for parsing HTML documents DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET # the default options used for parsing XML schemas diff --git a/lib/nokogiri/xslt.rb b/lib/nokogiri/xslt.rb index 50cdcaf9ae..b6c9442008 100644 --- a/lib/nokogiri/xslt.rb +++ b/lib/nokogiri/xslt.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true -require 'nokogiri/xslt/stylesheet' +require "nokogiri/xslt/stylesheet" module Nokogiri class << self @@ -22,32 +22,32 @@ module XSLT class << self ### # Parse the stylesheet in +string+, register any +modules+ - def parse string, modules = {} + def parse(string, modules = {}) modules.each do |url, klass| - XSLT.register url, klass + XSLT.register(url, klass) end + doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT) if Nokogiri.jruby? - Stylesheet.parse_stylesheet_doc(XML.parse(string), string) + Stylesheet.parse_stylesheet_doc(doc, string) else - Stylesheet.parse_stylesheet_doc(XML.parse(string)) + Stylesheet.parse_stylesheet_doc(doc) end end ### # Quote parameters in +params+ for stylesheet safety - def quote_params params + def quote_params(params) parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup - parray.each_with_index do |v,i| - if i % 2 > 0 - parray[i]= - if v =~ /'/ - "concat('#{ v.gsub(/'/, %q{', "'", '}) }')" - else - "'#{v}'"; - end + parray.each_with_index do |v, i| + parray[i] = if i % 2 > 0 + if v =~ /'/ + "concat('#{v.gsub(/'/, %q{', "'", '})}')" + else + "'#{v}'" + end else - parray[i] = v.to_s + v.to_s end end parray.flatten diff --git a/lib/nokogiri/xslt/stylesheet.rb b/lib/nokogiri/xslt/stylesheet.rb index acdd9cb3f1..4d445d7946 100644 --- a/lib/nokogiri/xslt/stylesheet.rb +++ b/lib/nokogiri/xslt/stylesheet.rb @@ -18,7 +18,7 @@ class Stylesheet # Apply an XSLT stylesheet to an XML::Document. # +params+ is an array of strings used as XSLT parameters. # returns serialized document - def apply_to document, params = [] + def apply_to(document, params = []) serialize(transform(document, params)) end end diff --git a/test/test_xslt_transforms.rb b/test/test_xslt_transforms.rb index 708215dc80..175c9920e3 100644 --- a/test/test_xslt_transforms.rb +++ b/test/test_xslt_transforms.rb @@ -1,372 +1,404 @@ # frozen_string_literal: true require "helper" -class TestXsltTransforms < Nokogiri::TestCase - def setup - super - @doc = Nokogiri::XML(File.open(XML_FILE)) - end +class Nokogiri::TestCase + describe Nokogiri::XSLT::Stylesheet do + let(:doc) { Nokogiri::XML(File.open(XML_FILE)) } - def test_class_methods - style = Nokogiri::XSLT(File.read(XSLT_FILE)) + def test_class_methods + style = Nokogiri::XSLT(File.read(XSLT_FILE)) - assert(result = style.apply_to(@doc, ['title', '"Grandma"'])) - assert_match(%r{

Grandma

}, result) - end + assert(result = style.apply_to(doc, ["title", '"Grandma"'])) + assert_match(%r{

Grandma

}, result) + end - def test_transform - assert(style = Nokogiri::XSLT.parse(File.read(XSLT_FILE))) - - assert(result = style.apply_to(@doc, ['title', '"Booyah"'])) - assert_match(%r{

Booyah

}, result) - assert_match(%r{}, result) - assert_match(%r{}, result) - assert_match(%r{}, result) - assert_match(%r{}, result) - assert_match(%r{EMP0003}, result) - assert_match(%r{Margaret Martin}, result) - assert_match(%r{Computer Specialist}, result) - assert_match(%r{100,000}, result) - assert_no_match(/Dallas|Texas/, result) - assert_no_match(/Female/, result) - - assert(result = style.apply_to(@doc, ['title', '"Grandma"'])) - assert_match(%r{

Grandma

}, result) - - assert(result = style.apply_to(@doc)) - assert_match(%r{

|

}, result) - end + def test_transform + assert(style = Nokogiri::XSLT.parse(File.read(XSLT_FILE))) + + assert(result = style.apply_to(doc, ["title", '"Booyah"'])) + assert_match(%r{

Booyah

}, result) + assert_match(%r{}, result) + assert_match(%r{}, result) + assert_match(%r{}, result) + assert_match(%r{}, result) + assert_match(%r{EMP0003}, result) + assert_match(%r{Margaret Martin}, result) + assert_match(%r{Computer Specialist}, result) + assert_match(%r{100,000}, result) + assert_no_match(/Dallas|Texas/, result) + assert_no_match(/Female/, result) + + assert(result = style.apply_to(doc, ["title", '"Grandma"'])) + assert_match(%r{

Grandma

}, result) + + assert(result = style.apply_to(doc)) + assert_match(%r{

|

}, result) + end - def test_xml_declaration - input_xml = <<~EOS - - - My Report - - EOS - - input_xsl = <<~EOS - - - - - - - <xsl:value-of select="report/title"/> - - -

- - -
-
- EOS + def test_xml_declaration + input_xml = <<~EOS + + + My Report + + EOS + + input_xsl = <<~EOS + + + + + + + <xsl:value-of select="report/title"/> + + +

+ + +
+
+ EOS - require 'nokogiri' + require "nokogiri" - xml = ::Nokogiri::XML(input_xml) - xsl = ::Nokogiri::XSLT(input_xsl) + xml = ::Nokogiri::XML(input_xml) + xsl = ::Nokogiri::XSLT(input_xsl) - assert_includes(xsl.apply_to(xml), '') - end + assert_includes(xsl.apply_to(xml), '') + end - def test_transform_with_output_style - xslt = "" - xslt = if Nokogiri.jruby? - Nokogiri::XSLT(<<~eoxslt) - + def test_transform_with_output_style + xslt = "" + xslt = if Nokogiri.jruby? + Nokogiri::XSLT(<<~eoxslt) + - - - - - - - - - - - - - - - - - - - + + - - eoxslt - else - Nokogiri::XSLT(<<~eoxslt) - + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + - - eoxslt + + eoxslt + else + Nokogiri::XSLT(<<~eoxslt) + + + + + + + + + + + + + + + + + + + + + + + + + eoxslt + end + result = xslt.apply_to(doc, ["title", "foo"]) + assert_no_match(//, result) + + # the entity-form is for systems with this bug with Encodings.properties + # https://issues.apache.org/jira/browse/XALANJ-2618 + # a.k.a. "Attempt to output character of integral value 48 that is not represented in specified output encoding of iso-8859-1." + assert_match( + /This is an adjacent|This is an adjacent/, result + ) end - result = xslt.apply_to(@doc, ['title', 'foo']) - assert_no_match(//, result) - - # the entity-form is for systems with this bug with Encodings.properties - # https://issues.apache.org/jira/browse/XALANJ-2618 - # a.k.a. "Attempt to output character of integral value 48 that is not represented in specified output encoding of iso-8859-1." - assert_match( - /This is an adjacent|This is an adjacent/, result - ) - end - def test_transform_arg_error - assert(style = Nokogiri::XSLT(File.read(XSLT_FILE))) - assert_raises(TypeError) do - style.transform(@doc, :foo) + def test_transform_arg_error + assert(style = Nokogiri::XSLT(File.read(XSLT_FILE))) + assert_raises(TypeError) do + style.transform(doc, :foo) + end end - end - def test_transform_with_hash - assert(style = Nokogiri::XSLT(File.read(XSLT_FILE))) - result = style.transform(@doc, { 'title' => '"Booyah"' }) - assert(result.html?) - assert_equal("Booyah", result.at_css("h1").content) - end + def test_transform_with_hash + assert(style = Nokogiri::XSLT(File.read(XSLT_FILE))) + result = style.transform(doc, { "title" => '"Booyah"' }) + assert(result.html?) + assert_equal("Booyah", result.at_css("h1").content) + end - def test_transform2 - assert(style = Nokogiri::XSLT(File.open(XSLT_FILE))) - assert(result_doc = style.transform(@doc)) - assert(result_doc.html?) - assert_equal("", result_doc.at_css("h1").content) + def test_transform2 + assert(style = Nokogiri::XSLT(File.open(XSLT_FILE))) + assert(result_doc = style.transform(doc)) + assert(result_doc.html?) + assert_equal("", result_doc.at_css("h1").content) - assert(style = Nokogiri::XSLT(File.read(XSLT_FILE))) - assert(result_doc = style.transform(@doc, ['title', '"Booyah"'])) - assert(result_doc.html?) - assert_equal("Booyah", result_doc.at_css("h1").content) + assert(style = Nokogiri::XSLT(File.read(XSLT_FILE))) + assert(result_doc = style.transform(doc, ["title", '"Booyah"'])) + assert(result_doc.html?) + assert_equal("Booyah", result_doc.at_css("h1").content) - assert(result_string = style.apply_to(@doc, ['title', '"Booyah"'])) - assert_equal(result_string, style.serialize(result_doc)) - end + assert(result_string = style.apply_to(doc, ["title", '"Booyah"'])) + assert_equal(result_string, style.serialize(result_doc)) + end - def test_transform_with_quote_params - assert(style = Nokogiri::XSLT(File.open(XSLT_FILE))) - assert(result_doc = style.transform(@doc, Nokogiri::XSLT.quote_params(['title', 'Booyah']))) - assert(result_doc.html?) - assert_equal("Booyah", result_doc.at_css("h1").content) + def test_transform_with_quote_params + assert(style = Nokogiri::XSLT(File.open(XSLT_FILE))) + assert(result_doc = style.transform(doc, Nokogiri::XSLT.quote_params(["title", "Booyah"]))) + assert(result_doc.html?) + assert_equal("Booyah", result_doc.at_css("h1").content) - assert(style = Nokogiri::XSLT.parse(File.read(XSLT_FILE))) - assert(result_doc = style.transform(@doc, Nokogiri::XSLT.quote_params({ 'title' => 'Booyah' }))) - assert(result_doc.html?) - assert_equal("Booyah", result_doc.at_css("h1").content) - end + assert(style = Nokogiri::XSLT.parse(File.read(XSLT_FILE))) + assert(result_doc = style.transform(doc, Nokogiri::XSLT.quote_params({ "title" => "Booyah" }))) + assert(result_doc.html?) + assert_equal("Booyah", result_doc.at_css("h1").content) + end - def test_quote_params - h = { - :sym => %{xxx}, - 'str' => %{"xxx"}, - :sym2 => %{'xxx'}, - 'str2' => %{x'x'x}, - :sym3 => %{x"x"x}, - } - hh = h.dup - result_hash = Nokogiri::XSLT.quote_params(h) - assert_equal(hh, h) # non-destructive - - a = h.to_a.flatten - result_array = Nokogiri::XSLT.quote_params(a) - assert_equal(h.to_a.flatten, a) # non-destructive - - assert_equal(result_array, result_hash) - end + def test_quote_params + h = { + :sym => %{xxx}, + "str" => %{"xxx"}, + :sym2 => %{'xxx'}, + "str2" => %{x'x'x}, + :sym3 => %{x"x"x}, + } + hh = h.dup + result_hash = Nokogiri::XSLT.quote_params(h) + assert_equal(hh, h) # non-destructive + + a = h.to_a.flatten + result_array = Nokogiri::XSLT.quote_params(a) + assert_equal(h.to_a.flatten, a) # non-destructive + + assert_equal(result_array, result_hash) + end - def test_exslt - # see http://yokolet.blogspot.com/2010/10/pure-java-nokogiri-xslt-extension.html") - skip_unless_libxml2("cannot get it working on JRuby") - - assert(doc = Nokogiri::XML.parse(File.read(EXML_FILE))) - assert(doc.xml?) - - assert(style = Nokogiri::XSLT.parse(File.read(EXSLT_FILE))) - params = { - p1: 'xxx', - p2: "x'x'x", - p3: 'x"x"x', - p4: '"xxx"', - } - result_doc = Nokogiri::XML.parse(style.apply_to(doc, + def test_exslt + # see http://yokolet.blogspot.com/2010/10/pure-java-nokogiri-xslt-extension.html") + skip_unless_libxml2("cannot get it working on JRuby") + + assert(doc = Nokogiri::XML.parse(File.read(EXML_FILE))) + assert(doc.xml?) + + assert(style = Nokogiri::XSLT.parse(File.read(EXSLT_FILE))) + params = { + p1: "xxx", + p2: "x'x'x", + p3: 'x"x"x', + p4: '"xxx"', + } + result_doc = Nokogiri::XML.parse(style.apply_to(doc, Nokogiri::XSLT.quote_params(params))) - assert_equal('func-result', result_doc.at('/root/function').content) - assert_equal(3, result_doc.at('/root/max').content.to_i) - assert_match( - /\d{4}-\d\d-\d\d([-|+]\d\d:\d\d)?/, - result_doc.at('/root/date').content - ) - result_doc.xpath('/root/params/*').each do |p| - assert_equal(p.content, params[p.name.intern]) - end - check_params(result_doc, params) - result_doc = Nokogiri::XML.parse(style.apply_to(doc, + assert_equal("func-result", result_doc.at("/root/function").content) + assert_equal(3, result_doc.at("/root/max").content.to_i) + assert_match( + /\d{4}-\d\d-\d\d([-|+]\d\d:\d\d)?/, + result_doc.at("/root/date").content + ) + result_doc.xpath("/root/params/*").each do |p| + assert_equal(p.content, params[p.name.intern]) + end + check_params(result_doc, params) + result_doc = Nokogiri::XML.parse(style.apply_to(doc, Nokogiri::XSLT.quote_params(params.to_a.flatten))) - check_params(result_doc, params) - end + check_params(result_doc, params) + end - def test_xslt_paramaters - # see http://yokolet.blogspot.com/2010/10/pure-java-nokogiri-xslt-extension.html") - skip_unless_libxml2("cannot get it working on JRuby") + def test_xslt_paramaters + # see http://yokolet.blogspot.com/2010/10/pure-java-nokogiri-xslt-extension.html") + skip_unless_libxml2("cannot get it working on JRuby") - xslt_str = <<~EOX - - - - - + xslt_str = <<~EOX + + + + + EOX - xslt = Nokogiri::XSLT(xslt_str) - doc = Nokogiri::XML("") - assert_match(/bar/, xslt.transform(doc, Nokogiri::XSLT.quote_params('foo' => 'bar')).to_s) - end + xslt = Nokogiri::XSLT(xslt_str) + doc = Nokogiri::XML("") + assert_match(/bar/, xslt.transform(doc, Nokogiri::XSLT.quote_params("foo" => "bar")).to_s) + end - def test_xslt_transform_error - # see http://yokolet.blogspot.com/2010/10/pure-java-nokogiri-xslt-extension.html") - skip_unless_libxml2("cannot get it working on JRuby") + def test_xslt_transform_error + # see http://yokolet.blogspot.com/2010/10/pure-java-nokogiri-xslt-extension.html") + skip_unless_libxml2("cannot get it working on JRuby") - xslt_str = <<~EOX - - - - - + xslt_str = <<~EOX + + + + + EOX - xslt = Nokogiri::XSLT(xslt_str) - doc = Nokogiri::XML("") - assert_raises(RuntimeError) { xslt.transform(doc) } - end + xslt = Nokogiri::XSLT(xslt_str) + doc = Nokogiri::XML("") + assert_raises(RuntimeError) { xslt.transform(doc) } + end - def test_xslt_parse_error - xslt_str = <<~EOX - - - - - - - - - - - - } - EOX - assert_raises(RuntimeError) { Nokogiri::XSLT.parse(xslt_str) } - end + def test_xslt_parse_error + xslt_str = <<~EOX + + + + + + + + + + + + } + EOX + assert_raises(RuntimeError) { Nokogiri::XSLT.parse(xslt_str) } + end - def test_passing_a_non_document_to_transform - xsl = Nokogiri::XSLT('') - assert_raises(ArgumentError) { xsl.transform("
") } - assert_raises(ArgumentError) { xsl.transform(Nokogiri::HTML("").css("body")) } - end + def test_passing_a_non_document_to_transform + xsl = Nokogiri::XSLT('') + assert_raises(ArgumentError) { xsl.transform("
") } + assert_raises(ArgumentError) { xsl.transform(Nokogiri::HTML("").css("body")) } + end - def check_params(result_doc, params) - result_doc.xpath('/root/params/*').each do |p| - assert_equal(p.content, params[p.name.intern]) + def check_params(result_doc, params) + result_doc.xpath("/root/params/*").each do |p| + assert_equal(p.content, params[p.name.intern]) + end end - end - def test_non_html_xslt_transform - xml = Nokogiri.XML(<<~EOXML) - - - 123 - - - EOXML - - xsl = Nokogiri.XSLT(<<~EOXSL) - - - - - - - - - EOXSL - - result = xsl.transform(xml) - assert(!result.html?) - end + def test_non_html_xslt_transform + xml = Nokogiri.XML(<<~EOXML) + + + 123 + + + EOXML - it "should not crash when given XPath 2.0 features" do - # - # https://github.com/sparklemotion/nokogiri/issues/1802 - # - # note that here the XPath 2.0 feature is `decimal`. - # this test case is taken from the example provided in the original issue. - # - xml = <<~EOXML - - - - 48.00 - - + xsl = Nokogiri.XSLT(<<~EOXSL) + + + + + + + + + EOXSL + + result = xsl.transform(xml) + assert(!result.html?) + end + + it "should not crash when given XPath 2.0 features" do + # + # https://github.com/sparklemotion/nokogiri/issues/1802 + # + # note that here the XPath 2.0 feature is `decimal`. + # this test case is taken from the example provided in the original issue. + # + xml = <<~EOXML + + + + 48.00 + + EOXML - xsl = <<~EOXSL - - - - - - - - - - - + xsl = <<~EOXSL + + + + + + + + + + + EOXSL - doc = Nokogiri::XML(xml) - xslt = Nokogiri::XSLT(xsl) - exception = assert_raise(RuntimeError) do - xslt.transform(doc) + doc = Nokogiri::XML(xml) + xslt = Nokogiri::XSLT(xsl) + exception = assert_raise(RuntimeError) do + xslt.transform(doc) + end + assert_match(/decimal/, exception.message) + end + + describe "DEFAULT_XSLT parse options" do + it "is the union of DEFAULT_XML and libxslt's XSLT_PARSE_OPTIONS" do + xslt_parse_options = Nokogiri::XML::ParseOptions.new.noent.dtdload.dtdattr.nocdata + expected = Nokogiri::XML::ParseOptions::DEFAULT_XML | xslt_parse_options.options + assert_equal(expected, Nokogiri::XML::ParseOptions::DEFAULT_XSLT) + end + + it "parses docs the same as xsltproc" do + skip_unless_libxml2("JRuby implementation disallows this edge case XSLT") + + # see https://github.com/sparklemotion/nokogiri/issues/1940 + xml = "" + xsl = <<~EOF + + + + + ]]> + + + EOF + + doc = Nokogiri::XML(xml) + stylesheet = Nokogiri::XSLT(xsl) + + # TODO: ideally I'd like to be able to access the parse options in the final object + # assert_equal(Nokogiri::XML::ParseOptions::DEFAULT_XSLT, stylesheet.document.parse_options) + + result = stylesheet.transform(doc) + assert_equal("<>", result.children.to_xml) + end end - assert_match(/decimal/, exception.message) end end