Class: DataDownloader
- Inherits:
-
Object
- Object
- DataDownloader
- Defined in:
- lib/data_downloader.rb
Overview
This class is used to download data files from the MLB Gameday site to local storage. The data that is downloaded will be stored into a path that replicates the MLB Gameday paths, for example here is a sample path for a specified date:
/components/game/mlb/year_2008/month_04/day_07
Constant Summary collapse
- FILE_BASE_PATH =
'components/game/mlb'
Instance Method Summary collapse
- #download_all_for_date(year, month, day) ⇒ Object
-
#download_all_for_game(gid) ⇒ Object
Downloads all data files associated with the game specified by the game id passed in.
- #download_all_for_month(year, month) ⇒ Object
- #download_all_for_range(year, start_month, start_day, end_month, end_day) ⇒ Object
- #download_batters_for_game(gid) ⇒ Object
- #download_inning_for_game(gid) ⇒ Object
- #download_media_for_game(gid) ⇒ Object
- #download_notification_for_game(gid) ⇒ Object
- #download_onbase_for_game(gid) ⇒ Object
- #download_pitchers_for_game(gid) ⇒ Object
- #download_xml_for_date(year, month, day) ⇒ Object
-
#download_xml_for_game(gid) ⇒ Object
Downloads the top-level xml directories for the game specified by the passed in game id.
- #tmp_fetch_pages_for_game(gid) ⇒ Object
- #tmp_fetch_pages_for_month(year, month, end_day) ⇒ Object
Instance Method Details
#download_all_for_date(year, month, day) ⇒ Object
148 149 150 151 152 153 154 155 156 |
# File 'lib/data_downloader.rb', line 148 def download_all_for_date(year, month, day) day_path = get_day_path(year, month, day) write_file("#{day_path}/games.html", GamedayFetcher.fetch_games_page(year, month, day)) download_xml_for_date(year, month, day) games = Game.find_by_date(year, month, day) games.each do |game| download_all_for_game(game.gid) end end |
#download_all_for_game(gid) ⇒ Object
Downloads all data files associated with the game specified by the game id passed in.
21 22 23 24 25 26 27 28 29 |
# File 'lib/data_downloader.rb', line 21 def download_all_for_game(gid) download_xml_for_game(gid) download_batters_for_game(gid) download_inning_for_game(gid) download_media_for_game(gid) download_notification_for_game(gid) download_onbase_for_game(gid) download_pitchers_for_game(gid) end |
#download_all_for_month(year, month) ⇒ Object
159 160 161 162 163 164 165 166 |
# File 'lib/data_downloader.rb', line 159 def download_all_for_month(year, month) start_date = Date.new(year.to_i, month.to_i) # first day of month end_date = (start_date >> 1)-1 # last day of month ((start_date)..(end_date)).each do |dt| puts 'Downloading ' + year.to_s + '/' + month.to_s + '/' + dt.day.to_s download_all_for_date(year, month, dt.day.to_s) end end |
#download_all_for_range(year, start_month, start_day, end_month, end_day) ⇒ Object
169 170 171 172 173 174 175 176 |
# File 'lib/data_downloader.rb', line 169 def download_all_for_range(year, start_month, start_day, end_month, end_day) start_date = Date.new(year.to_i, start_month.to_i, start_day.to_i) end_date = Date.new(year.to_i, end_month.to_i, end_day.to_i) ((start_date)..(end_date)).each do |dt| puts 'Downloading ' + dt.year.to_s + '/' + dt.month.to_s + '/' + dt.day.to_s download_all_for_date(dt.year.to_s, dt.month.to_s, dt.day.to_s) end end |
#download_batters_for_game(gid) ⇒ Object
32 33 34 35 36 37 38 39 40 41 |
# File 'lib/data_downloader.rb', line 32 def download_batters_for_game(gid) write_file(get_gid_path(gid) + "/batters.html", GamedayFetcher.fetch_batters_page(gid)) batter_path = get_gid_path(gid) + "/batters" ids = Batter.get_all_ids_for_game(gid) ids.each do |id| if !File.exists? "#{batter_path}/#{id}.xml" write_file("#{batter_path}/#{id}.xml", GamedayFetcher.fetch_batter(gid, id)) end end end |
#download_inning_for_game(gid) ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# File 'lib/data_downloader.rb', line 56 def download_inning_for_game(gid) game = Game.new(gid) inn_count = game.get_num_innings inn_path = get_gid_path(gid) + "/inning" (1..inn_count).each do |inn| if !File.exists? "#{inn_path}/inning_#{inn}.xml" write_file("#{inn_path}/inning_#{inn}.xml", GamedayFetcher.fetch_inningx(gid, inn)) end end if !File.exists? "#{inn_path}/inning_Scores.xml" write_file("#{inn_path}/inning_Scores.xml", GamedayFetcher.fetch_inning_scores(gid)) end if !File.exists? "#{inn_path}/inning_hit.xml" write_file("#{inn_path}/inning_hit.xml", GamedayFetcher.fetch_inning_hit(gid)) end end |
#download_media_for_game(gid) ⇒ Object
74 75 76 77 78 79 80 81 82 |
# File 'lib/data_downloader.rb', line 74 def download_media_for_game(gid) media_path = get_gid_path(gid) + "/media" if !File.exists? "#{media_path}/highlights.xml" write_file("#{media_path}/highlights.xml", GamedayFetcher.fetch_media_highlights(gid)) end if !File.exists? "#{media_path}/mobile.xml" write_file("#{media_path}/mobile.xml", GamedayFetcher.fetch_media_mobile(gid)) end end |
#download_notification_for_game(gid) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/data_downloader.rb', line 85 def download_notification_for_game(gid) game = Game.new(gid) inn_count = game.get_num_innings notif_path = get_gid_path(gid) + "/notifications" (1..inn_count).each do |inn| if !File.exists? "#{notif_path}/notifications_#{inn}.xml" write_file("#{notif_path}/notifications_#{inn}.xml", GamedayFetcher.fetch_notifications_inning(gid, inn)) end end if !File.exists? "#{notif_path}/notifications_full.xml" write_file("#{notif_path}/notifications_full.xml", GamedayFetcher.fetch_notifications_full(gid)) end end |
#download_onbase_for_game(gid) ⇒ Object
100 101 102 103 104 |
# File 'lib/data_downloader.rb', line 100 def download_onbase_for_game(gid) onbase_path = get_gid_path(gid) + "/onbase" write_file("#{onbase_path}/linescore.xml", GamedayFetcher.fetch_onbase_linescore(gid)) write_file("#{onbase_path}/plays.xml", GamedayFetcher.fetch_onbase_plays(gid)) end |
#download_pitchers_for_game(gid) ⇒ Object
44 45 46 47 48 49 50 51 52 53 |
# File 'lib/data_downloader.rb', line 44 def download_pitchers_for_game(gid) write_file(get_gid_path(gid) + "/pitchers.html", GamedayFetcher.fetch_pitchers_page(gid)) pitcher_path = get_gid_path(gid) + "/pitchers" ids = Pitcher.get_all_ids_for_game(gid) ids.each do |id| if !File.exists? "#{pitcher_path}/#{id}.xml" write_file("#{pitcher_path}/#{id}.xml", GamedayFetcher.fetch_pitcher(gid, id)) end end end |
#download_xml_for_date(year, month, day) ⇒ Object
140 141 142 143 144 145 |
# File 'lib/data_downloader.rb', line 140 def download_xml_for_date(year, month, day) day_path = get_day_path(year, month, day) write_file("#{day_path}/epg.xml", GamedayFetcher.fetch_epg(year, month, day)) write_file("#{day_path}/master_scoreboard.xml", GamedayFetcher.fetch_scoreboard(year, month, day)) write_file("#{day_path}/media/highlights.xml", GamedayFetcher.fetch_day_highlights(year, month, day)) end |
#download_xml_for_game(gid) ⇒ Object
Downloads the top-level xml directories for the game specified by the passed in game id. The files include:
bench.xml
benchO.xml
boxscore.xml
emailSource.xml
eventLog.xml
game.xml
game_events.xml
gamecenter.xml
gameday_Syn.xml
linescore.xml
miniscoreboard.xml
players.xml
plays.xml
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/data_downloader.rb', line 122 def download_xml_for_game(gid) gid_path = get_gid_path(gid) write_file("#{gid_path}/bench.xml", GamedayFetcher.fetch_bench(gid)) write_file("#{gid_path}/benchO.xml", GamedayFetcher.fetch_bencho(gid)) write_file("#{gid_path}/boxscore.xml", GamedayFetcher.fetch_boxscore(gid)) write_file("#{gid_path}/emailSource.xml", GamedayFetcher.fetch_emailsource(gid)) write_file("#{gid_path}/eventLog.xml", GamedayFetcher.fetch_eventlog(gid)) write_file("#{gid_path}/game.xml", GamedayFetcher.fetch_game_xml(gid)) write_file("#{gid_path}/game_events.xml", GamedayFetcher.fetch_game_events(gid)) write_file("#{gid_path}/gamecenter.xml", GamedayFetcher.fetch_gamecenter_xml(gid)) write_file("#{gid_path}/gameday_Syn.xml", GamedayFetcher.fetch_gamedaysyn(gid)) write_file("#{gid_path}/linescore.xml", GamedayFetcher.fetch_linescore(gid)) write_file("#{gid_path}/miniscoreboard.xml", GamedayFetcher.fetch_miniscoreboard(gid)) write_file("#{gid_path}/players.xml", GamedayFetcher.fetch_players(gid)) write_file("#{gid_path}/plays.xml", GamedayFetcher.fetch_plays(gid)) end |
#tmp_fetch_pages_for_game(gid) ⇒ Object
198 199 200 201 |
# File 'lib/data_downloader.rb', line 198 def tmp_fetch_pages_for_game(gid) write_file(get_gid_path(gid) + "/batters.html", GamedayFetcher.fetch_batters_page(gid)) write_file(get_gid_path(gid) + "/pitchers.html", GamedayFetcher.fetch_pitchers_page(gid)) end |
#tmp_fetch_pages_for_month(year, month, end_day) ⇒ Object
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/data_downloader.rb', line 179 def tmp_fetch_pages_for_month(year, month, end_day) start_date = Date.new(year.to_i, month.to_i) # first day of month if end_day end_date = Date.new(year.to_i, month.to_i, end_day.to_i) else end_date = (start_date >> 1)-1 # last day of month end ((start_date)..(end_date)).each do |dt| day = dt.day.to_s puts day day_path = get_day_path(year, month, day) write_file("#{day_path}/games.html", GamedayFetcher.fetch_games_page(year, month, day)) games = Game.find_by_date(year, month, day) games.each do |game| tmp_fetch_pages_for_game(game.gid) end end end |