Class: Worldfootball::Metal

Inherits:
Object
  • Object
show all
Defined in:
lib/worldfootball/download.rb

Overview

todo/check: put in Downloader namespace/class - why? why not?

or use Metal    - no "porcelain" downloaders / machinery

Constant Summary collapse

BASE_URL =
'https://www.weltfussball.de'

Class Method Summary collapse

Class Method Details

.download_page(url) ⇒ Object

get & record/save to cache



127
128
129
130
131
132
# File 'lib/worldfootball/download.rb', line 127

def self.download_page( url )  ## get & record/save to cache
  response = Webget.page( url )  ## fetch (and cache) html page (via HTTP GET)

  ## note: exit on get / fetch error - do NOT continue for now - why? why not?
  exit 1   if response.status.nok?    ## e.g.  HTTP status code != 200
end

.download_report(slug, cache: true) ⇒ Object



87
88
89
90
91
92
93
94
95
96
# File 'lib/worldfootball/download.rb', line 87

def self.download_report( slug, cache: true )
  url  = report_url( slug )

  ## check check first
  if cache && Webcache.cached?( url )
     puts "  reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
  else
    download_page( url )
  end
end

.download_reports_for_schedule(slug, cache: true) ⇒ Object

todo/check: rename to reports_for_schedule or such - why? why not?



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/worldfootball/download.rb', line 99

def self.download_reports_for_schedule( slug, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?

  page = Page::Schedule.from_cache( slug )
  matches = page.matches

  puts "matches - #{matches.size} rows:"
  pp matches[0]

  puts "#{page.generated_in_days_ago}  - #{page.generated}"

  ## todo/fix: restore sleep to old value at the end!!!!
  ## Webget.config.sleep = 8    ## fetch 7-8 pages/min

  matches.each_with_index do |match,i|
     est = (Webget.config.sleep * (matches.size-(i+1)))/60.0   # estimated time left

     puts "fetching #{i+1}/#{matches.size} (#{est} min(s)) - #{match[:round]} | #{match[:team1]} v #{match[:team2]}..."
     report_ref = match[:report_ref ]
     if report_ref
       download_report( report_ref, cache: cache )
     else
       puts "!! WARN: report ref missing for match:"
       pp match
     end
  end
end

.download_schedule(slug) ⇒ Object

www.weltfussball.de/alle_spiele/fra-ligue-2-2019-2020/

https://www.weltfussball.de/alle_spiele/ita-serie-b-2019-2020/
https://www.weltfussball.de/alle_spiele/rus-premier-liga-2019-2020/
https://www.weltfussball.de/alle_spiele/rus-1-division-2019-2020/
https://www.weltfussball.de/alle_spiele/tur-sueperlig-2019-2020/
https://www.weltfussball.de/alle_spiele/tur-1-lig-2019-2020/


82
83
84
85
# File 'lib/worldfootball/download.rb', line 82

def self.download_schedule( slug )
  url = schedule_url( slug )
  download_page( url )
end

.download_team(slug, cache: true) ⇒ Object



56
57
58
59
60
61
62
63
64
65
# File 'lib/worldfootball/download.rb', line 56

def self.download_team( slug, cache: true )
  url  = team_url( slug )

  ## check check first
  if cache && Webcache.cached?( url )
     puts "  reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
  else
    download_page( url )
  end
end

.report_url(slug) ⇒ Object



44
# File 'lib/worldfootball/download.rb', line 44

def self.report_url( slug )    "#{BASE_URL}/spielbericht/#{slug}/"; end

.schedule_url(slug) ⇒ Object



43
# File 'lib/worldfootball/download.rb', line 43

def self.schedule_url( slug )  "#{BASE_URL}/alle_spiele/#{slug}/";  end

.team_url(slug) ⇒ Object



45
# File 'lib/worldfootball/download.rb', line 45

def self.team_url( slug )   "#{BASE_URL}/teams/#{slug}/"; end