/
parse_options.rb
129 lines (119 loc) · 4.51 KB
/
parse_options.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# frozen_string_literal: true
module Nokogiri
module XML
###
# Parse options for passing to Nokogiri.XML or Nokogiri.HTML
#
# == Building combinations of parse options
# You can build your own combinations of these parse options by using any of the following methods:
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
# Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
#
# == Removing particular parse options
# You can also remove options from an instance of +ParseOptions+ dynamically.
# Every option has an equivalent <code>no{option}</code> method in lowercase. You can call these methods on an instance of +ParseOptions+ to remove the option.
# Note that this is not available for +STRICT+.
#
# # Setting the RECOVER & NOENT options...
# options = Nokogiri::XML::ParseOptions.new.recover.noent
# # later...
# options.norecover # Removes the Nokogiri::XML::ParseOptions::RECOVER option
# options.nonoent # Removes the Nokogiri::XML::ParseOptions::NOENT option
#
class ParseOptions
# Strict parsing
STRICT = 0
# Recover from errors
RECOVER = 1 << 0
# Substitute entities
NOENT = 1 << 1
# Load external subsets
DTDLOAD = 1 << 2
# Default DTD attributes
DTDATTR = 1 << 3
# validate with the DTD
DTDVALID = 1 << 4
# suppress error reports
NOERROR = 1 << 5
# suppress warning reports
NOWARNING = 1 << 6
# pedantic error reporting
PEDANTIC = 1 << 7
# remove blank nodes
NOBLANKS = 1 << 8
# use the SAX1 interface internally
SAX1 = 1 << 9
# Implement XInclude substitution
XINCLUDE = 1 << 10
# Forbid network access. Recommended for dealing with untrusted documents.
NONET = 1 << 11
# Do not reuse the context dictionary
NODICT = 1 << 12
# remove redundant namespaces declarations
NSCLEAN = 1 << 13
# merge CDATA as text nodes
NOCDATA = 1 << 14
# do not generate XINCLUDE START/END nodes
NOXINCNODE = 1 << 15
# compact small text nodes; no modification of the tree allowed afterwards (will possibly crash if you try to modify the tree)
COMPACT = 1 << 16
# parse using XML-1.0 before update 5
OLD10 = 1 << 17
# do not fixup XINCLUDE xml:base uris
NOBASEFIX = 1 << 18
# relax any hardcoded limit from the parser
HUGE = 1 << 19
# the default options used for parsing XML documents
DEFAULT_XML = RECOVER | NONET
# the default options used for parsing XSLT stylesheets
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
# the default options used for parsing HTML documents
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
# the default options used for parsing XML schemas
DEFAULT_SCHEMA = NONET
attr_accessor :options
def initialize options = STRICT
@options = options
end
constants.each do |constant|
next if constant.to_sym == :STRICT
class_eval %{
def #{constant.downcase}
@options |= #{constant}
self
end
def no#{constant.downcase}
@options &= ~#{constant}
self
end
def #{constant.downcase}?
#{constant} & @options == #{constant}
end
}
end
def strict
@options &= ~RECOVER
self
end
def strict?
@options & RECOVER == STRICT
end
def ==(other)
other.to_i == to_i
end
alias :to_i :options
def inspect
options = []
self.class.constants.each do |k|
options << k.downcase if send(:"#{k.downcase}?")
end
super.sub(/>$/, " " + options.join(', ') + ">")
end
end
end
end