-
Notifications
You must be signed in to change notification settings - Fork 0
/
techandciviclife_parser.rb
145 lines (131 loc) · 4.11 KB
/
techandciviclife_parser.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
module Techandciviclife
class Parser
TOP_LEVEL_NODES = %w(
Office
Person
Party
Candidate
CandidateContest
CandidateSelection
ElectoralDistrict
)
=begin
BallotMeasureContest - nope
Office
BallotMeasureSelection - nope
Selection - what?
RetentionContest - nope
)
=end
def reader
Nokogiri::XML::Reader(File.open(Rails.root + 'lib/vip.xml'))
end
def run
t = Time.now
clean!
unknown_node_names = Set.new
reader.each do |node|
if node.node_type != Nokogiri::XML::Reader::TYPE_END_ELEMENT
if node.name.in?(TOP_LEVEL_NODES)
Techandciviclife::Parsers.const_get(node.name).new(node).parse
else
unknown_node_names.add(node.name)
end
end
end
puts "skipped unknown nodes: #{unknown_node_names.inspect}"
puts "finished in: #{Time.now - t} seconds"
end
def clean!
::Techandciviclife::Party.delete_all
::Techandciviclife::Person.delete_all
::Techandciviclife::Candidate.delete_all
::Techandciviclife::CandidateContest.delete_all
::Techandciviclife::CandidateSelection.delete_all
::Techandciviclife::ElectoralDistrict.delete_all
::Techandciviclife::Office.delete_all
end
end
module Parsers
class Base
attr_reader :node, :xml, :id
def initialize(node)
@node = node
@xml = Nokogiri::XML(node.outer_xml)
@id = node.attribute('id')
end
end
class Party < Base
def parse
name = xml.xpath("//Party/Name/Text[@language='en']").text
::Techandciviclife::Party.create!(name: name, internal_id: id)
end
end
#Maybe redundant... can we just use the Candidate name and ignore this?
class Person < Base
def parse
name = xml.xpath("//Person/FullName/Text[@language='en']").text
puts [id, name]
::Techandciviclife::Person.create!(name: name, internal_id: id)
end
end
class Candidate < Base
def parse
person_id = xml.xpath("//Candidate/PersonId").text
party_id = xml.xpath("//Candidate/PartyId").text
name = xml.xpath("//Candidate/BallotName/Text[@language='en']").text
puts [id, person_id, party_id, name]
::Techandciviclife::Candidate.create!(name: name, internal_id: id, party_id: party_id, person_id: person_id)
end
end
class CandidateContest < Base
def parse
name = xml.xpath("//CandidateContest/Name").text
candidate_selections = xml.xpath("//CandidateContest/BallotSelectionIds").
text.to_s.split(' ')
electoral_district_id = xml.xpath("//CandidateContest/ElectoralDistrictId").text
office_id = xml.xpath("//CandidateContest/OfficeIds").text
::Techandciviclife::CandidateContest.create!(
name: name,
internal_id: id,
candidate_selections: candidate_selections,
electoral_district_id: electoral_district_id,
office_id: office_id
)
end
end
class CandidateSelection < Base
def parse
candidate_ids = xml.xpath("//CandidateSelection/CandidateIds").text.to_s.split(' ')
::Techandciviclife::CandidateSelection.create!(
candidate_ids: candidate_ids,
internal_id: id
)
end
end
class ElectoralDistrict < Base
def parse
name = xml.xpath("//ElectoralDistrict/Name").text
type = xml.xpath("//ElectoralDistrict/Type").text
::Techandciviclife::ElectoralDistrict.create!(
name: name,
internal_id: id,
type: type
)
end
end
class Office < Base
def parse
description = xml.xpath("//Office/Description/Text[@language='en']").text
electoral_district_id = xml.xpath("//Office/ElectoralDistrictId").text
name = xml.xpath("//Office/Name/Text[@language='en']").text
::Techandciviclife::Office.create!(
description: description,
internal_id: id,
electoral_district_id: electoral_district_id,
name: name
)
end
end
end
end