/
xml-extractor.rb
143 lines (120 loc) · 3.41 KB
/
xml-extractor.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# encoding: UTF-8
require 'nokogiri'
require 'rosette/core'
module Rosette
module Extractors
class XmlExtractor < Rosette::Core::StaticExtractor
def extract_each_from(xml_content)
if block_given?
each_entry(xml_content) do |key, meta_key, line_number|
yield make_phrase(key, meta_key), line_number
end
else
to_enum(__method__, xml_content)
end
end
def supports_line_numbers?
false
end
protected
def parse(xml_content)
Nokogiri::XML(xml_content) do |config|
config.options = Nokogiri::XML::ParseOptions::NONET
end
end
class AndroidExtractor < XmlExtractor
protected
def each_entry(xml_content, &block)
doc = parse(xml_content)
each_string_entry(doc, &block)
each_array_entry(doc, &block)
each_plural_entry(doc, &block)
end
def each_string_entry(doc)
doc.xpath('//string').each do |node|
yield(
text_from(node),
name_from(node),
line_number_from(node)
)
end
end
def each_array_entry(doc)
doc.xpath('//string-array').each do |array|
prefix = name_from(array)
array.xpath('item').each_with_index do |item, idx|
yield(
text_from(item),
"#{prefix}.#{idx}",
line_number_from(item)
)
end
end
end
def each_plural_entry(doc)
doc.xpath('//plurals').each do |plurals|
prefix = name_from(plurals)
plurals.xpath('item').each do |item|
quantity = item.attributes['quantity'].value
yield(
text_from(item),
"#{prefix}.#{quantity}",
line_number_from(item)
)
end
end
end
def text_from(node)
builder = Nokogiri::XML::Builder.new do |builder|
builder.root do
node.children.each do |child|
serialize(child, builder)
end
end
end
strip_enclosing_quotes(
builder.doc.xpath('/root/node()').to_xml
)
end
def serialize(node, builder)
if node.text?
builder.text(unescape(node.text))
else
builder.send("#{node.name}_", node.attributes) do
node.children.each do |child|
serialize(child, builder)
end
end
end
end
def name_from(node)
if attribute = node.attributes['name']
attribute.value
end
end
def line_number_from(node)
# Nokogiri supports `node.line`, but it doesn't return
# correct line numbers when running on JRuby. See:
# https://github.com/sparklemotion/nokogiri/issues/1223
nil
end
def unescape(text)
text
.gsub("\\'", "'")
.gsub('\\"', '"')
end
def strip_enclosing_quotes(text)
quote = case text[0]
when "'", '"'
text[0]
end
if quote
text.gsub(/\A#{quote}(.*)#{quote}\z/) { $1 }
else
text
end
end
end
end
end
end