Skip to content

Commit

Permalink
Merge pull request #2083 from sparklemotion/repro-iso-jruby
Browse files Browse the repository at this point in the history
Fix the tests in #2080
  • Loading branch information
flavorjones committed Oct 13, 2020
2 parents 992fad9 + 9cf1b35 commit d27cb96
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -98,6 +98,7 @@ This release ends support for:
* [CRuby] Fixed installation on AIX with respect to `vasprintf`. [[#1908](https://github.com/sparklemotion/nokogiri/issues/1908)]
* [JRuby] Standardize reading from IO like objects, including StringIO. [[#1888](https://github.com/sparklemotion/nokogiri/issues/1888), [#1897](https://github.com/sparklemotion/nokogiri/issues/1897)]
* [Windows Visual C++] Fixed compiler warnings and errors. [[#2061](https://github.com/sparklemotion/nokogiri/issues/2061), [#2068](https://github.com/sparklemotion/nokogiri/issues/2068)]
* [JRuby] Fixed document encoding regression in v1.11.0 release candidates. [[#2080](https://github.com/sparklemotion/nokogiri/issues/2080), [#2083](https://github.com/sparklemotion/nokogiri/issues/2083)] (Thanks, [@thbar](https://github.com/thbar)!)


### Removed
Expand Down
6 changes: 2 additions & 4 deletions ext/java/nokogiri/XmlNode.java
Expand Up @@ -759,15 +759,13 @@ public IRubyObject in_context(ThreadContext context, IRubyObject str, IRubyObjec
klass = getNokogiriClass(runtime, "Nokogiri::HTML::Document");
ctx = new HtmlDomParserContext(runtime, options);
((HtmlDomParserContext) ctx).enableDocumentFragment();
istream = new ByteArrayInputStream((rubyStringToString(str)).getBytes());
ctx.setStringInputSource(context, str, context.nil);
} else {
klass = getNokogiriClass(runtime, "Nokogiri::XML::Document");
ctx = new XmlDomParserContext(runtime, options);
String input = rubyStringToString(str);
istream = new ByteArrayInputStream(input.getBytes());
ctx.setStringInputSource(context, str, context.nil);
}

ctx.setInputSource(istream);
// TODO: for some reason, document.getEncoding() can be null or nil (don't know why)
// run `test_parse_with_unparented_html_text_context_node' few times to see this happen
if (document instanceof HtmlDocument && !(document.getEncoding() == null || document.getEncoding().isNil())) {
Expand Down
5 changes: 3 additions & 2 deletions ext/java/nokogiri/internals/HtmlDomParserContext.java
Expand Up @@ -66,11 +66,12 @@
public class HtmlDomParserContext extends XmlDomParserContext {

public HtmlDomParserContext(Ruby runtime, IRubyObject options) {
super(runtime, options);
this(runtime, runtime.getNil(), options);
}

public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
super(runtime, encoding, options);
java_encoding = NokogiriHelpers.getValidEncoding(encoding);
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion ext/java/nokogiri/internals/XmlDomParserContext.java
Expand Up @@ -93,7 +93,7 @@ public XmlDomParserContext(Ruby runtime, IRubyObject options) {
public XmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) {
super(runtime);
this.options = new ParserContext.Options(RubyFixnum.fix2long(options));
java_encoding = NokogiriHelpers.getValidEncoding(encoding);
java_encoding = NokogiriHelpers.getValidEncodingOrNull(encoding);
ruby_encoding = encoding;
initErrorHandler();
initParser(runtime);
Expand Down
5 changes: 5 additions & 0 deletions test/files/iso-8859-1.xml
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<FEED>
<DATA>Accepté</DATA>
<DATA>Something</DATA>
</FEED>
15 changes: 15 additions & 0 deletions test/test_iso.rb
@@ -0,0 +1,15 @@
require "helper"

class TestISO < Nokogiri::TestCase
def test_iso_content_not_lacking_accents
data = IO.binread('test/files/iso-8859-1.xml')
document = Nokogiri::XML(data)
assert_equal "Accepté", document.at('DATA').text
end

def test_iso_content_not_truncated
data = IO.binread('test/files/iso-8859-1.xml')
document = Nokogiri::XML(data)
assert_equal 2, document.search('DATA').count
end
end

0 comments on commit d27cb96

Please sign in to comment.