Module: GoogleScraper

Defined in:
lib/google_scraper.rb,
lib/google_scraper/version.rb

Constant Summary collapse

VERSION =
"0.0.1"

Class Method Summary collapse

Class Method Details

.run(query, options = {}) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/google_scraper.rb', line 7

def self.run(query, options = {})
  puts "Query is #{query}"
  query = URI.escape(query)
  data = []

  (0..9).each do |count|
    puts "scraping page #{count+1} with query #{query}"
    doc = Nokogiri::HTML(open("https://www.google.ca/search?q=#{query}&start=#{count*10}"))

    link = doc.search('h3')
    puts "Found #{link.size} possible results"

    link.each do |l| 
      a = l.search('a').first
      next unless a['href'].match(/^\/url/)
      result = {
        :url => a['href'].gsub(/^\/url\?q\=/,'').gsub(/\&sa=.*$/, ''),
        :text => a.content
      }
      data << result
    end
  end

  CSV.open('output.csv', 'w') do |csv|
    csv << ["url", "text"]
    data.each do |entry|
      begin
        csv << [entry[:url], entry[:text]]
      rescue => e
        puts "ugh. #{e} - skipping"

      end
    end
  end   
end