/
parser_service.rb
66 lines (51 loc) · 1.85 KB
/
parser_service.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# frozen_string_literal: true
module Google
class ParserService
NON_ADS_RESULT_SELECTOR = 'a[data-ved]:not([role]):not([jsaction]):not(.adwords):not(.footer-links)'
AD_CONTAINER_ID = 'tads'
ADWORDS_CLASS = 'adwords'
def initialize(html_response:)
raise ArgumentError, 'response.body cannot be blank' if html_response.body.blank?
@html = html_response
@document = Nokogiri::HTML.parse(html_response)
# Add a class to all AdWords link for easier manipulation
@document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS)
# Mark footer links to identify them
@document.css('#footcnt a').add_class('footer-links')
end
# Write parsed data directly in the keyword object argument
# and return back the keyword object
def parse_into!(keyword)
keyword.ads_top_count = ads_top_count
keyword.ads_page_count = ads_page_count
keyword.ads_top_url = ads_top_url
keyword.ads_page_url = ads_page_url
keyword.non_ads_result_count = non_ads_result_count
keyword.total_link_count = total_link_count
keyword.html = @html
keyword
end
def ads_top_count
@document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").count
end
def ads_page_count
@document.css(".#{ADWORDS_CLASS}").count
end
def ads_top_url
# data-ved enables to filter "role=list" (sub links) items
@document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").map { |a_tag| a_tag['href'] }
end
def ads_page_url
@document.css(".#{ADWORDS_CLASS}").map { |a_tag| a_tag['href'] }
end
def non_ads_result_count
@document.css(NON_ADS_RESULT_SELECTOR).count
end
def non_ads_url
@document.css(NON_ADS_RESULT_SELECTOR).map { |a_tag| a_tag['href'] }
end
def total_link_count
@document.css('a').count
end
end
end