/
parse_options.rb
131 lines (121 loc) · 4.65 KB
/
parse_options.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# frozen_string_literal: true
module Nokogiri
module XML
###
# Parse options for passing to Nokogiri.XML or Nokogiri.HTML
#
# == Building combinations of parse options
# You can build your own combinations of these parse options by using any of the following methods:
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
# Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
#
# == Removing particular parse options
# You can also remove options from an instance of +ParseOptions+ dynamically.
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
# Note that this is not available for +STRICT+.
#
# # Setting the RECOVER & NOENT options...
# options = Nokogiri::XML::ParseOptions.new.recover.noent
# # later...
# options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
# options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
#
class ParseOptions
# Strict parsing
STRICT = 0
# Recover from errors
RECOVER = 1 << 0
# Substitute entities
NOENT = 1 << 1
# Load external subsets
DTDLOAD = 1 << 2
# Default DTD attributes
DTDATTR = 1 << 3
# validate with the DTD
DTDVALID = 1 << 4
# suppress error reports
NOERROR = 1 << 5
# suppress warning reports
NOWARNING = 1 << 6
# pedantic error reporting
PEDANTIC = 1 << 7
# remove blank nodes
NOBLANKS = 1 << 8
# use the SAX1 interface internally
SAX1 = 1 << 9
# Implement XInclude substitution
XINCLUDE = 1 << 10
# Forbid network access. Recommended for dealing with untrusted documents.
NONET = 1 << 11
# Do not reuse the context dictionary
NODICT = 1 << 12
# remove redundant namespaces declarations
NSCLEAN = 1 << 13
# merge CDATA as text nodes
NOCDATA = 1 << 14
# do not generate XINCLUDE START/END nodes
NOXINCNODE = 1 << 15
# compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
COMPACT = 1 << 16
# parse using XML-1.0 before update 5
OLD10 = 1 << 17
# do not fixup XINCLUDE xml:base uris
NOBASEFIX = 1 << 18
# relax any hardcoded limit from the parser
HUGE = 1 << 19
# line numbers stored as long int (instead of a short int)
BIG_LINES = 1 << 22
# the default options used for parsing XML documents
DEFAULT_XML = RECOVER | NONET | BIG_LINES
# the default options used for parsing XSLT stylesheets
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA | BIG_LINES
# the default options used for parsing HTML documents
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET | BIG_LINES
# the default options used for parsing XML schemas
DEFAULT_SCHEMA = NONET | BIG_LINES
attr_accessor :options
def initialize options = STRICT
@options = options
end
constants.each do |constant|
next if constant.to_sym == :STRICT
class_eval %{
def #{constant.downcase}
@options |= #{constant}
self
end
def no#{constant.downcase}
@options &= ~#{constant}
self
end
def #{constant.downcase}?
#{constant} & @options == #{constant}
end
}
end
def strict
@options &= ~RECOVER
self
end
def strict?
@options & RECOVER == STRICT
end
def ==(other)
other.to_i == to_i
end
alias :to_i :options
def inspect
options = []
self.class.constants.each do |k|
options << k.downcase if send(:"#{k.downcase}?")
end
super.sub(/>$/, " " + options.join(', ') + ">")
end
end
end
end