Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML parse processing instructions (backport to v1.13.x) #2509

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Binary file modified lib/nekohtml.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion lib/nokogiri/xml/node.rb
Expand Up @@ -123,7 +123,7 @@ class Node
# [Yields] Nokogiri::XML::Node
# [Returns] Nokogiri::XML::Node
#
def initialize(name, document)
def initialize(name, document) # rubocop:disable Style/RedundantInitialize
# This is intentionally empty.
end

Expand Down
2 changes: 1 addition & 1 deletion lib/nokogiri/xml/processing_instruction.rb
Expand Up @@ -3,7 +3,7 @@
module Nokogiri
module XML
class ProcessingInstruction < Node
def initialize(document, name, content)
def initialize(document, name, content) # rubocop:disable Style/RedundantInitialize
end
end
end
Expand Down
29 changes: 21 additions & 8 deletions test/html4/test_document.rb
Expand Up @@ -727,6 +727,17 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
assert_equal(0, doc.errors.length)
end

def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
#
# just checking that this doesn't raise a valgrind error. we
# don't otherwise have any test coverage for removing DTDs.
#
100.times do |_i|
Nokogiri::HTML::Document.new.internal_subset.remove
end
end

it "skips encoding for script tags" do
html = Nokogiri::HTML(<<~EOHTML)
<html>
Expand Down Expand Up @@ -763,15 +774,17 @@ def test_silencing_nonparse_errors_during_attribute_insertion_1262
assert_equal "ISO-8859-1", html.encoding.name
end

def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
#
# just checking that this doesn't raise a valgrind error. we
# don't otherwise have any test coverage for removing DTDs.
#
100.times do |_i|
Nokogiri::HTML::Document.new.internal_subset.remove
it "handles ill-formed processing instructions" do
html = %{<html><body><!--><?a/}
doc = Nokogiri::HTML4::Document.parse(html)
expected = if Nokogiri.jruby?
[Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::PI_NODE]
elsif Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch")
[Nokogiri::XML::Node::COMMENT_NODE]
else
[]
end
assert_equal(expected, doc.at_css("body").children.map(&:type))
end

describe ".parse" do
Expand Down
2 changes: 1 addition & 1 deletion test/html4/test_element_description.rb
Expand Up @@ -43,7 +43,7 @@ def test_deprecated?
end

def test_inline?
assert_predicate(ElementDescription["a"], :inline?)
assert_predicate(ElementDescription["strong"], :inline?)
refute_predicate(ElementDescription["div"], :inline?)
end

Expand Down
14 changes: 14 additions & 0 deletions test/html5/test_nokogumbo.rb
Expand Up @@ -322,6 +322,20 @@ def test_line_cdata
assert_equal(3, node.line)
end

it "handles ill-formed processing instructions in a document" do
html = %{<html><body><!--><?a/}
doc = Nokogiri::HTML5::Document.parse(html)
expected = [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
assert_equal(expected, doc.at_css("body").children.map(&:type))
end

it "handles ill-formed processing instructions in a fragment" do
html = %{<div><!--><?a/}
frag = Nokogiri::HTML5::DocumentFragment.parse(html)
expected = [Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::COMMENT_NODE]
assert_equal(expected, frag.at_css("div").children.map(&:type))
end

private

def buffer
Expand Down