-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser_service.rb
68 lines (53 loc) · 1.79 KB
/
parser_service.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# frozen_string_literal: true
module Google
class ParserService
NON_ADS_RESULT_SELECTOR = 'a[data-ved]:not([role]):not([jsaction]):not(.adwords):not(.footer-links)'
AD_CONTAINER_ID = 'tads'
ADWORDS_CLASS = 'adwords'
def initialize(html_response:)
raise ArgumentError, 'response.body cannot be blank' if html_response.body.blank?
@html = html_response
@document = Nokogiri::HTML.parse(html_response)
# Add a class to all AdWords link for easier manipulation
@document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS)
# Mark footer links to identify them
@document.css('#footcnt a').add_class('footer-links')
end
# Parse html data and return a hash with the results
def call
{
ads_top_count: ads_top_count,
ads_page_count: ads_page_count,
ads_top_url: ads_top_url,
ads_page_url: ads_page_url,
non_ads_result_count: non_ads_result_count,
non_ads_url: non_ads_url,
total_link_count: total_link_count,
html: @html
}
end
private
def ads_top_count
@document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").count
end
def ads_page_count
@document.css(".#{ADWORDS_CLASS}").count
end
def ads_top_url
# data-ved enables to filter "role=list" (sub links) items
@document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").map { |a_tag| a_tag['href'] }
end
def ads_page_url
@document.css(".#{ADWORDS_CLASS}").map { |a_tag| a_tag['href'] }
end
def non_ads_result_count
@document.css(NON_ADS_RESULT_SELECTOR).count
end
def non_ads_url
@document.css(NON_ADS_RESULT_SELECTOR).map { |a_tag| a_tag['href'] }
end
def total_link_count
@document.css('a').count
end
end
end