Class: DataDownloader

Inherits:
Object
  • Object
show all
Defined in:
lib/data_downloader.rb

Overview

This class is used to download data files from the MLB Gameday site to local storage. The data that is downloaded will be stored into a path that replicates the MLB Gameday paths, for example here is a sample path for a specified date:

/components/game/mlb/year_2008/month_04/day_07

Constant Summary collapse

FILE_BASE_PATH =
'components/game/mlb'

Instance Method Summary collapse

Instance Method Details

#download_all_for_date(year, month, day) ⇒ Object



148
149
150
151
152
153
154
155
156
# File 'lib/data_downloader.rb', line 148

def download_all_for_date(year, month, day)
  day_path = get_day_path(year, month, day)
  write_file("#{day_path}/games.html", GamedayFetcher.fetch_games_page(year, month, day))
  download_xml_for_date(year, month, day)
  games = Game.find_by_date(year, month, day)
  games.each do |game|
    download_all_for_game(game.gid)
  end
end

#download_all_for_game(gid) ⇒ Object

Downloads all data files associated with the game specified by the game id passed in.



21
22
23
24
25
26
27
28
29
# File 'lib/data_downloader.rb', line 21

def download_all_for_game(gid)
  download_xml_for_game(gid)
  download_batters_for_game(gid)
  download_inning_for_game(gid)
  download_media_for_game(gid)
  download_notification_for_game(gid)
  download_onbase_for_game(gid)
  download_pitchers_for_game(gid)
end

#download_all_for_month(year, month) ⇒ Object



159
160
161
162
163
164
165
166
# File 'lib/data_downloader.rb', line 159

def download_all_for_month(year, month)   
  start_date = Date.new(year.to_i, month.to_i) # first day of month
  end_date = (start_date >> 1)-1 # last day of month
  ((start_date)..(end_date)).each do |dt| 
    puts 'Downloading ' + year.to_s + '/' + month.to_s + '/' + dt.day.to_s
    download_all_for_date(year, month, dt.day.to_s)
  end
end

#download_all_for_range(year, start_month, start_day, end_month, end_day) ⇒ Object



169
170
171
172
173
174
175
176
# File 'lib/data_downloader.rb', line 169

def download_all_for_range(year, start_month, start_day, end_month, end_day)
  start_date = Date.new(year.to_i, start_month.to_i, start_day.to_i) 
  end_date = Date.new(year.to_i, end_month.to_i, end_day.to_i) 
  ((start_date)..(end_date)).each do |dt| 
    puts 'Downloading ' + dt.year.to_s + '/' + dt.month.to_s + '/' + dt.day.to_s
    download_all_for_date(dt.year.to_s, dt.month.to_s, dt.day.to_s)
  end
end

#download_batters_for_game(gid) ⇒ Object



32
33
34
35
36
37
38
39
40
41
# File 'lib/data_downloader.rb', line 32

def download_batters_for_game(gid)
  write_file(get_gid_path(gid) + "/batters.html", GamedayFetcher.fetch_batters_page(gid))
  batter_path = get_gid_path(gid) + "/batters"
  ids = Batter.get_all_ids_for_game(gid)
  ids.each do |id|
    if !File.exists? "#{batter_path}/#{id}.xml"
      write_file("#{batter_path}/#{id}.xml", GamedayFetcher.fetch_batter(gid, id)) 
    end
  end
end

#download_inning_for_game(gid) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/data_downloader.rb', line 56

def download_inning_for_game(gid)
  game = Game.new(gid)
  inn_count = game.get_num_innings
  inn_path = get_gid_path(gid) + "/inning"
  (1..inn_count).each do |inn|
    if !File.exists? "#{inn_path}/inning_#{inn}.xml"
      write_file("#{inn_path}/inning_#{inn}.xml", GamedayFetcher.fetch_inningx(gid, inn)) 
    end  
  end 
  if !File.exists? "#{inn_path}/inning_Scores.xml"
    write_file("#{inn_path}/inning_Scores.xml", GamedayFetcher.fetch_inning_scores(gid))
  end
  if !File.exists? "#{inn_path}/inning_hit.xml"
    write_file("#{inn_path}/inning_hit.xml", GamedayFetcher.fetch_inning_hit(gid)) 
  end
end

#download_media_for_game(gid) ⇒ Object



74
75
76
77
78
79
80
81
82
# File 'lib/data_downloader.rb', line 74

def download_media_for_game(gid)
  media_path = get_gid_path(gid) + "/media"
  if !File.exists? "#{media_path}/highlights.xml"
    write_file("#{media_path}/highlights.xml", GamedayFetcher.fetch_media_highlights(gid))  
  end
  if !File.exists? "#{media_path}/mobile.xml"  
    write_file("#{media_path}/mobile.xml", GamedayFetcher.fetch_media_mobile(gid))  
  end  
end

#download_notification_for_game(gid) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/data_downloader.rb', line 85

def download_notification_for_game(gid)
  game = Game.new(gid)
  inn_count = game.get_num_innings
  notif_path = get_gid_path(gid) + "/notifications"
  (1..inn_count).each do |inn|
    if !File.exists? "#{notif_path}/notifications_#{inn}.xml" 
      write_file("#{notif_path}/notifications_#{inn}.xml", GamedayFetcher.fetch_notifications_inning(gid, inn)) 
    end
  end   
  if !File.exists? "#{notif_path}/notifications_full.xml"
    write_file("#{notif_path}/notifications_full.xml", GamedayFetcher.fetch_notifications_full(gid))
  end
end

#download_onbase_for_game(gid) ⇒ Object



100
101
102
103
104
# File 'lib/data_downloader.rb', line 100

def download_onbase_for_game(gid)
  onbase_path = get_gid_path(gid) + "/onbase"
  write_file("#{onbase_path}/linescore.xml", GamedayFetcher.fetch_onbase_linescore(gid))    
  write_file("#{onbase_path}/plays.xml", GamedayFetcher.fetch_onbase_plays(gid))  
end

#download_pitchers_for_game(gid) ⇒ Object



44
45
46
47
48
49
50
51
52
53
# File 'lib/data_downloader.rb', line 44

def download_pitchers_for_game(gid)
  write_file(get_gid_path(gid) + "/pitchers.html", GamedayFetcher.fetch_pitchers_page(gid))
  pitcher_path = get_gid_path(gid) + "/pitchers"
  ids = Pitcher.get_all_ids_for_game(gid)
  ids.each do |id|
    if !File.exists? "#{pitcher_path}/#{id}.xml" 
      write_file("#{pitcher_path}/#{id}.xml", GamedayFetcher.fetch_pitcher(gid, id)) 
    end
  end
end

#download_xml_for_date(year, month, day) ⇒ Object



140
141
142
143
144
145
# File 'lib/data_downloader.rb', line 140

def download_xml_for_date(year, month, day)
  day_path = get_day_path(year, month, day)
  write_file("#{day_path}/epg.xml", GamedayFetcher.fetch_epg(year, month, day))
  write_file("#{day_path}/master_scoreboard.xml", GamedayFetcher.fetch_scoreboard(year, month, day))
  write_file("#{day_path}/media/highlights.xml", GamedayFetcher.fetch_day_highlights(year, month, day))
end

#download_xml_for_game(gid) ⇒ Object

Downloads the top-level xml directories for the game specified by the passed in game id. The files include:

bench.xml
benchO.xml
boxscore.xml
emailSource.xml
eventLog.xml
game.xml
game_events.xml
gamecenter.xml
gameday_Syn.xml
linescore.xml
miniscoreboard.xml
players.xml
plays.xml


122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/data_downloader.rb', line 122

def download_xml_for_game(gid)
  gid_path = get_gid_path(gid)
  write_file("#{gid_path}/bench.xml", GamedayFetcher.fetch_bench(gid))    
  write_file("#{gid_path}/benchO.xml", GamedayFetcher.fetch_bencho(gid))
  write_file("#{gid_path}/boxscore.xml", GamedayFetcher.fetch_boxscore(gid))
  write_file("#{gid_path}/emailSource.xml", GamedayFetcher.fetch_emailsource(gid))
  write_file("#{gid_path}/eventLog.xml", GamedayFetcher.fetch_eventlog(gid))
  write_file("#{gid_path}/game.xml", GamedayFetcher.fetch_game_xml(gid))
  write_file("#{gid_path}/game_events.xml", GamedayFetcher.fetch_game_events(gid))
  write_file("#{gid_path}/gamecenter.xml", GamedayFetcher.fetch_gamecenter_xml(gid))
  write_file("#{gid_path}/gameday_Syn.xml", GamedayFetcher.fetch_gamedaysyn(gid))
  write_file("#{gid_path}/linescore.xml", GamedayFetcher.fetch_linescore(gid))
  write_file("#{gid_path}/miniscoreboard.xml", GamedayFetcher.fetch_miniscoreboard(gid))
  write_file("#{gid_path}/players.xml", GamedayFetcher.fetch_players(gid))
  write_file("#{gid_path}/plays.xml", GamedayFetcher.fetch_plays(gid))
end

#tmp_fetch_pages_for_game(gid) ⇒ Object



198
199
200
201
# File 'lib/data_downloader.rb', line 198

def tmp_fetch_pages_for_game(gid)
  write_file(get_gid_path(gid) + "/batters.html", GamedayFetcher.fetch_batters_page(gid))
  write_file(get_gid_path(gid) + "/pitchers.html", GamedayFetcher.fetch_pitchers_page(gid))
end

#tmp_fetch_pages_for_month(year, month, end_day) ⇒ Object



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/data_downloader.rb', line 179

def tmp_fetch_pages_for_month(year, month, end_day)
  start_date = Date.new(year.to_i, month.to_i) # first day of month
  if end_day
    end_date = Date.new(year.to_i, month.to_i, end_day.to_i)
  else
    end_date = (start_date >> 1)-1 # last day of month
  end
  ((start_date)..(end_date)).each do |dt| 
    day = dt.day.to_s
    puts day
    day_path = get_day_path(year, month, day)
    write_file("#{day_path}/games.html", GamedayFetcher.fetch_games_page(year, month, day))
    games = Game.find_by_date(year, month, day)
    games.each do |game|
      tmp_fetch_pages_for_game(game.gid)
    end
  end
end