forked from premailer/premailer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nokogumbo.rb
259 lines (222 loc) · 9.51 KB
/
nokogumbo.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
class Premailer
module Adapter
# Nokogiri adapter
module Nokogumbo
include AdapterHelper::RgbToHex
# Merge CSS into the HTML document.
#
# @return [String] an HTML.
def to_inline_css
doc = @processed_doc
@unmergable_rules = CssParser::Parser.new
# Give all styles already in style attributes a specificity of 1000
# per http://www.w3.org/TR/CSS21/cascade.html#specificity
doc.search("*[@style]").each do |el|
el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
end
# Iterate through the rules and merge them into the HTML
@css_parser.each_selector(:all) do |selector, declaration, specificity, media_types|
# Save un-mergable rules separately
selector.gsub!(/:link([\s]*)+/i) { |m| $1 }
# Convert element names to lower case
selector.gsub!(/([\s]|^)([\w]+)/) { |m| $1.to_s + $2.to_s.downcase }
if Premailer.is_media_query?(media_types) || selector =~ Premailer::RE_UNMERGABLE_SELECTORS
@unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration), media_types) unless @options[:preserve_styles]
else
begin
if selector =~ Premailer::RE_RESET_SELECTORS
# this is in place to preserve the MailChimp CSS reset: http://github.com/mailchimp/Email-Blueprints/
# however, this doesn't mean for testing pur
@unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration)) unless !@options[:preserve_reset]
end
# Change single ID CSS selectors into xpath so that we can match more
# than one element. Added to work around dodgy generated code.
selector.gsub!(/\A\#([\w_\-]+)\Z/, '*[@id=\1]')
doc.search(selector).each do |el|
if el.elem? and (el.name != 'head' and el.parent.name != 'head')
# Add a style attribute or append to the existing one
block = "[SPEC=#{specificity}[#{declaration}]]"
el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
end
end
rescue ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
$stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
next
end
end
end
# Remove script tags
doc.search("script").remove if @options[:remove_scripts]
# Read STYLE attributes and perform folding
doc.search("*[@style]").each do |el|
style = el.attributes['style'].to_s
declarations = []
style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
declarations << rs
end
# Perform style folding
merged = CssParser.merge(declarations)
merged.expand_shorthand!
# Duplicate CSS attributes as HTML attributes
if Premailer::RELATED_ATTRIBUTES.has_key?(el.name) && @options[:css_to_attributes]
Premailer::RELATED_ATTRIBUTES[el.name].each do |css_attr, html_attr|
if el[html_attr].nil? and not merged[css_attr].empty?
new_val = merged[css_attr].dup
# Remove url() function wrapper
new_val.gsub!(/url\((['"])(.*?)\1\)/, '\2')
# Remove !important, trailing semi-colon, and leading/trailing whitespace
new_val.gsub!(/;$|\s*!important/, '').strip!
# For width and height tags, remove px units
new_val.gsub!(/(\d+)px/, '\1') if %w[width height].include?(html_attr)
# For color-related tags, convert RGB to hex if specified by options
new_val = ensure_hex(new_val) if css_attr.end_with?('color') && @options[:rgb_to_hex_attributes]
el[html_attr] = new_val
end
unless @options[:preserve_style_attribute]
merged.instance_variable_get("@declarations").tap do |declarations|
declarations.delete(css_attr)
end
end
end
end
# Collapse multiple rules into one as much as possible.
merged.create_shorthand! if @options[:create_shorthands]
# write the inline STYLE attribute
el['style'] = merged.declarations_to_s
end
doc = write_unmergable_css_rules(doc, @unmergable_rules) unless @options[:drop_unmergeable_css_rules]
if @options[:remove_classes] or @options[:remove_comments]
doc.traverse do |el|
if el.comment? and @options[:remove_comments]
el.remove
elsif el.element?
el.remove_attribute('class') if @options[:remove_classes]
end
end
end
if @options[:remove_ids]
# find all anchor's targets and hash them
targets = []
doc.search("a[@href^='#']").each do |el|
target = el.get_attribute('href')[1..-1]
targets << target
el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
end
# hash ids that are links target, delete others
doc.search("*[@id]").each do |el|
id = el.get_attribute('id')
if targets.include?(id)
el.set_attribute('id', Digest::MD5.hexdigest(id))
else
el.remove_attribute('id')
end
end
end
if @options[:reset_contenteditable]
doc.search('*[@contenteditable]').each do |el|
el.remove_attribute('contenteditable')
end
end
@processed_doc = doc
if is_xhtml?
# we don't want to encode carriage returns
@processed_doc.to_xhtml(:encoding => @options[:output_encoding]).gsub(/&\#(xD|13);/i, "\r")
else
@processed_doc.to_html(:encoding => @options[:output_encoding])
end
end
# Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
# and write it into the <tt>head</tt>.
#
# <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
#
# @return [::Nokogiri::XML] a document.
def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
styles = unmergable_rules.to_s
unless styles.empty?
if @options[:html_fragment]
style_tag = ::Nokogiri::XML::Node.new("style", doc)
style_tag.content = styles
doc.add_child(style_tag)
else
style_tag = doc.create_element "style", styles
head = doc.at_css('head')
head ||= doc.root.first_element_child.add_previous_sibling(doc.create_element "head") if doc.root && doc.root.first_element_child
head ||= doc.add_child(doc.create_element "head")
head << style_tag
end
end
doc
end
# Converts the HTML document to a format suitable for plain-text e-mail.
#
# If present, uses the <body> element as its base; otherwise uses the whole document.
#
# @return [String] a plain text.
def to_plain_text
html_src = ''
begin
html_src = @doc.at("body").inner_html
rescue;
end
html_src = @doc.to_html unless html_src and not html_src.empty?
convert_to_text(html_src, @options[:line_length], @html_encoding)
end
# Gets the original HTML as a string.
# @return [String] HTML.
def to_s
if is_xhtml?
@doc.to_xhtml(:encoding => nil)
else
@doc.to_html(:encoding => nil)
end
end
# Load the HTML file and convert it into an Nokogiri document.
#
# @return [::Nokogiri::XML] a document.
def load_html(input) # :nodoc:
thing = nil
# TODO: duplicate options
if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
thing = input
elsif @is_local_file
@base_dir = File.dirname(input)
thing = File.open(input, 'r')
else
thing = URI.open(input)
end
if thing.respond_to?(:read)
thing = thing.read
end
return nil unless thing
doc = nil
# Handle HTML entities
if @options[:replace_html_entities] == true and thing.is_a?(String)
HTML_ENTITIES.map do |entity, replacement|
thing.gsub! entity, replacement
end
end
# Default encoding is ASCII-8BIT (binary) per http://groups.google.com/group/nokogiri-talk/msg/0b81ef0dc180dc74
# However, we really don't want to hardcode this. ASCII-8BIT should be the default, but not the only option.
if thing.is_a?(String) and RUBY_VERSION =~ /1.9/
thing = thing.force_encoding(@options[:input_encoding]).encode!
end
doc = if @options[:html_fragment]
::Nokogiri::HTML5.fragment(thing)
else
::Nokogiri::HTML5(thing)
end
# Fix for removing any CDATA tags from both style and script tags inserted per
# https://github.com/sparklemotion/nokogiri/issues/311 and
# https://github.com/premailer/premailer/issues/199
%w(style script).each do |tag|
doc.search(tag).children.each do |child|
child.swap(child.text()) if child.cdata?
end
end
doc
end
end
end
end