Class: BggHotnessCLI::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/bgg-hotness-cli/scraper.rb

Instance Method Summary collapse

Constructor Details

#initialize(path) ⇒ Scraper

Initialize with an API path to open



4
5
6
# File 'lib/bgg-hotness-cli/scraper.rb', line 4

def initialize(path)
  @doc = Nokogiri::HTML(URI.open(path))
end

Instance Method Details

#game_listObject

Get the initial list of games from the hotness list.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/bgg-hotness-cli/scraper.rb', line 9

def game_list
  
  # Get the data and create a new instance of 
  # Game for each item in the main list
  @doc.css('item').each do |item|
    name = item.css('name')[0]['value']
    rank = item['rank']
    id = item['id']
    year = item.css('yearpublished')[0]['value']

    # Create a new instance of Game with item data
    game = BggHotnessCLI::Game.new(name, id, year, rank)
  end

end

#get_details(game) ⇒ Object

Get the details from the game’s details page via API. This could be done during the initial loop, but it’s done per game request to save inital load time.



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/bgg-hotness-cli/scraper.rb', line 28

def get_details(game) 

  # HTML.parse is used here to clean up HTML codes like   and line breaks
  game.description = Nokogiri::HTML.parse(@doc.css('description').text).text

  # These items are pretty easy to grab
  game.minplayers = @doc.css('minplayers')[0]['value']
  game.maxplayers = @doc.css('maxplayers')[0]['value']
  game.minplaytime = @doc.css('minplaytime')[0]['value'] 
  game.maxplaytime = @doc.css('maxplaytime')[0]['value'] 
  game.minage = @doc.css('minage')[0]['value'] 

  # Pull the various types of item out of <link> into respective arrays
  game.category = @doc.css('link').select{|link| link['type']=="boardgamecategory"}.collect{|link| link['value']}
  game.mechanic = @doc.css('link').select{|link| link['type']=="boardgamemechanic"}.collect{|link| link['value']}
  game.publisher = @doc.css('link').select{|link| link['type']=="boardgamepublisher"}.collect{|link| link['value']}
  game.designer = @doc.css('link').select{|link| link['type']=="boardgamedesigner"}.collect{|link| link['value']}

  # The URL formula isn't via API. It's just boardgamegeek's URL scheme.
  game.url = "https://boardgamegeek.com/boardgame/#{game.id}"
end