Skip to content

Commit

Permalink
[#7] Use constants to DRY parser css selectors
Browse files Browse the repository at this point in the history
  • Loading branch information
malparty committed Jun 23, 2021
1 parent 2ef2cba commit 41fea83
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions app/services/google/parser_service.rb
Expand Up @@ -3,16 +3,18 @@
module Google
class ParserService
NON_ADS_RESULT_SELECTOR = 'a[data-ved]:not([role]):not([jsaction]):not(.adwords):not(.footer-links)'
AD_CONTAINER_ID = 'tads'
ADWORDS_CLASS = 'adwords'

def initialize(html_response:)
raise ArgumentError, 'response.body cannot be nil' if html_response.body.blank?
raise ArgumentError, 'response.body cannot be blank' if html_response.body.blank?

@html = html_response

@document = Nokogiri::HTML.parse(html_response)

# Add a class to all AdWords link for easier manipulation
@document.css('div[data-text-ad] a[data-ved]').add_class('adwords')
@document.css('div[data-text-ad] a[data-ved]').add_class(ADWORDS_CLASS)

# Mark footer links to identify them
@document.css('#footcnt a').add_class('footer-links')
Expand All @@ -33,20 +35,20 @@ def parse_into!(keyword)
end

def ads_top_count
@document.css('#tads .adwords').count
@document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").count
end

def ads_page_count
@document.css('.adwords').count
@document.css(".#{ADWORDS_CLASS}").count
end

def ads_top_url
# data-ved enables to filter "role=list" (sub links) items
@document.css('#tads .adwords').map { |a_tag| a_tag['href'] }
@document.css("##{AD_CONTAINER_ID} .#{ADWORDS_CLASS}").map { |a_tag| a_tag['href'] }
end

def ads_page_url
@document.css('.adwords').map { |a_tag| a_tag['href'] }
@document.css(".#{ADWORDS_CLASS}").map { |a_tag| a_tag['href'] }
end

def non_ads_result_count
Expand Down

0 comments on commit 41fea83

Please sign in to comment.