diff --git a/CHANGELOG.md b/CHANGELOG.md index 003a08d643..c631eaf619 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -98,6 +98,7 @@ This release ends support for: * [CRuby] Fixed installation on AIX with respect to `vasprintf`. [[#1908](https://github.com/sparklemotion/nokogiri/issues/1908)] * [JRuby] Standardize reading from IO like objects, including StringIO. [[#1888](https://github.com/sparklemotion/nokogiri/issues/1888), [#1897](https://github.com/sparklemotion/nokogiri/issues/1897)] * [Windows Visual C++] Fixed compiler warnings and errors. [[#2061](https://github.com/sparklemotion/nokogiri/issues/2061), [#2068](https://github.com/sparklemotion/nokogiri/issues/2068)] +* [JRuby] Fixed document encoding regression in v1.11.0 release candidates. [[#2080](https://github.com/sparklemotion/nokogiri/issues/2080), [#2083](https://github.com/sparklemotion/nokogiri/issues/2083)] (Thanks, [@thbar](https://github.com/thbar)!) ### Removed diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 45853a71ed..1a7e48459c 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -759,15 +759,13 @@ public IRubyObject in_context(ThreadContext context, IRubyObject str, IRubyObjec klass = getNokogiriClass(runtime, "Nokogiri::HTML::Document"); ctx = new HtmlDomParserContext(runtime, options); ((HtmlDomParserContext) ctx).enableDocumentFragment(); - istream = new ByteArrayInputStream((rubyStringToString(str)).getBytes()); + ctx.setStringInputSource(context, str, context.nil); } else { klass = getNokogiriClass(runtime, "Nokogiri::XML::Document"); ctx = new XmlDomParserContext(runtime, options); - String input = rubyStringToString(str); - istream = new ByteArrayInputStream(input.getBytes()); + ctx.setStringInputSource(context, str, context.nil); } - ctx.setInputSource(istream); // TODO: for some reason, document.getEncoding() can be null or nil (don't know why) // run `test_parse_with_unparented_html_text_context_node' few times to see this happen if (document instanceof HtmlDocument && !(document.getEncoding() == null || document.getEncoding().isNil())) { diff --git a/ext/java/nokogiri/internals/HtmlDomParserContext.java b/ext/java/nokogiri/internals/HtmlDomParserContext.java index 52955b5d13..cafcee7c54 100644 --- a/ext/java/nokogiri/internals/HtmlDomParserContext.java +++ b/ext/java/nokogiri/internals/HtmlDomParserContext.java @@ -66,11 +66,12 @@ public class HtmlDomParserContext extends XmlDomParserContext { public HtmlDomParserContext(Ruby runtime, IRubyObject options) { - super(runtime, options); + this(runtime, runtime.getNil(), options); } - + public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) { super(runtime, encoding, options); + java_encoding = NokogiriHelpers.getValidEncoding(encoding); } @Override diff --git a/ext/java/nokogiri/internals/XmlDomParserContext.java b/ext/java/nokogiri/internals/XmlDomParserContext.java index 3a75d72f6f..d400faae0c 100644 --- a/ext/java/nokogiri/internals/XmlDomParserContext.java +++ b/ext/java/nokogiri/internals/XmlDomParserContext.java @@ -93,7 +93,7 @@ public XmlDomParserContext(Ruby runtime, IRubyObject options) { public XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) { super(runtime); this.options = new ParserContext.Options(RubyFixnum.fix2long(options)); - java_encoding = NokogiriHelpers.getValidEncoding(encoding); + java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding); ruby_encoding = encoding; initErrorHandler(); initParser(runtime); diff --git a/test/files/iso-8859-1.xml b/test/files/iso-8859-1.xml new file mode 100644 index 0000000000..d714a0989b --- /dev/null +++ b/test/files/iso-8859-1.xml @@ -0,0 +1,5 @@ + + + Accepté + Something + diff --git a/test/test_iso.rb b/test/test_iso.rb new file mode 100644 index 0000000000..0f3f64a6e0 --- /dev/null +++ b/test/test_iso.rb @@ -0,0 +1,15 @@ +require "helper" + +class TestISO < Nokogiri::TestCase + def test_iso_content_not_lacking_accents + data = IO.binread('test/files/iso-8859-1.xml') + document = Nokogiri::XML(data) + assert_equal "Accepté", document.at('DATA').text + end + + def test_iso_content_not_truncated + data = IO.binread('test/files/iso-8859-1.xml') + document = Nokogiri::XML(data) + assert_equal 2, document.search('DATA').count + end +end