/
test_push_parser.rb
87 lines (76 loc) · 2.48 KB
/
test_push_parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
# frozen_string_literal: true
require "helper"
module Nokogiri
module HTML
module SAX
class TestPushParser < Nokogiri::SAX::TestCase
def setup
super
@parser = HTML::SAX::PushParser.new(Doc.new)
end
def test_end_document_called
@parser.<<(<<~eoxml)
<p id="asdfasdf">
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
assert(!@parser.document.end_document_called)
@parser.finish
assert(@parser.document.end_document_called)
end
def test_start_element
@parser.<<(<<~eoxml)
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><head><body><p id="asdfasdf">
eoxml
assert_equal([["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
@parser.document.start_elements)
@parser.<<(<<~eoxml)
<!-- This is a comment -->
Paragraph 1
</p></body></html>
eoxml
assert_equal([' This is a comment '], @parser.document.comments)
@parser.finish
end
def test_chevron_partial_html
@parser.<<(<<~eoxml)
<p id="asdfasdf">
eoxml
@parser.<<(<<-eoxml)
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
assert_equal([' This is a comment '], @parser.document.comments)
@parser.finish
end
def test_chevron
@parser.<<(<<~eoxml)
<p id="asdfasdf">
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
@parser.finish
assert_equal([' This is a comment '], @parser.document.comments)
end
def test_default_options
assert_equal(0, @parser.options)
end
def test_broken_encoding
skip_unless_libxml2("ultra hard to fix for pure Java version")
@parser.options |= XML::ParseOptions::RECOVER
# This is ISO_8859-1:
@parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
@parser.finish
assert(@parser.document.errors.size >= 1)
assert_equal("Gau\337", @parser.document.data.join)
assert_equal([["r"], ["body"], ["html"]], @parser.document.end_elements)
end
end
end
end
end