diff --git a/lib/nokogiri/html4/document.rb b/lib/nokogiri/html4/document.rb index 177efc04f7..fbc22d2072 100644 --- a/lib/nokogiri/html4/document.rb +++ b/lib/nokogiri/html4/document.rb @@ -268,7 +268,7 @@ def start_element(name, attrs = []) end def self.detect_encoding(chunk) - (m = chunk.match(/\A(<\?xml[ \t\r\n]+[^>]*>)/)) && + (m = chunk.match(/\A(<\?xml[ \t\r\n][^>]*>)/)) && (return Nokogiri.XML(m[1]).encoding) if Nokogiri.jruby? diff --git a/test/html4/test_document_encoding.rb b/test/html4/test_document_encoding.rb index 6115301764..ecb4aa9a12 100644 --- a/test/html4/test_document_encoding.rb +++ b/test/html4/test_document_encoding.rb @@ -155,6 +155,18 @@ def binopen(file) end end end + + it "does not start backtracking during detection of XHTML encoding" do + # this test is a quick and dirty version + # of the more complete perf test that is on main. + n = 40_000 + redos_string = "