Class: MetacriticGames::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/scraper.rb

Class Method Summary collapse

Class Method Details

.docObject



7
8
9
# File 'lib/scraper.rb', line 7

def self.doc
  @@doc
end

.doc=(name) ⇒ Object



3
4
5
# File 'lib/scraper.rb', line 3

def self.doc= (name)
  @@doc = name
end

.get_title_platform(game) ⇒ Object

method to clean up text scrape for platform



40
41
42
# File 'lib/scraper.rb', line 40

def self.get_title_platform(game)
  game.text.slice(/\(([^)]+)\)/).delete"()"
end

.get_title_text(game) ⇒ Object

method to clean up text scrape



35
36
37
# File 'lib/scraper.rb', line 35

def self.get_title_text(game)
    game.text.gsub(/\(([^)]+)\)/, "").strip
end

.get_title_url(game) ⇒ Object

method to convert relative url on index page to absolute url



45
46
47
48
# File 'lib/scraper.rb', line 45

def self.get_title_url(game)
  absolute = "http://www.metacritic.com"
  absolute + game.css("a").attribute("href").value
end

.scrape_game(url) ⇒ Object

scrape individual page and return scores and genre listings



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/scraper.rb', line 51

def self.scrape_game(url)
  doc = Nokogiri::HTML(open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,  'User-Agent' => 'safari'))
  genre_array = []
  doc.css("li.summary_detail.product_genre").css("span.data").each do |genre|
    MetacriticGames::CLI.progressbar.increment
    genre_array << genre.text
  end
  details_hash = {
    :metascore => {
      :platform => doc.css("div.metascore_w.xlarge").text
    },
    :user_score => {
      :platform => doc.css(".metascore_anchor .user").text
    },
    :genre => genre_array
  }
end

.scrape_new_releasesObject

returns the array of game information hashes from the index page



21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/scraper.rb', line 21

def self.scrape_new_releases
  self.doc.css(".product_wrap .product_title").collect do |game|
    MetacriticGames::CLI.progressbar.increment
    if game.text.include? ?(
      game_hash = {
        :name => self.get_title_text(game),
        :platform => self.get_title_platform(game),
        :url => self.get_title_url(game)
      }
    end
  end
end

.scrape_platform(url) ⇒ Object

scrapes page for platforms and sets the class url variable to avoid scraping the index page a second time, returns the platform array to CLI



12
13
14
15
16
17
18
# File 'lib/scraper.rb', line 12

def self.scrape_platform(url)
  self.doc = Nokogiri::HTML(open(url, ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE,  'User-Agent' => 'safari'))
  self.doc.css(".platform_item").collect do |platform|
    MetacriticGames::CLI.progressbar.increment
    platform.text
  end
end