Module: Worldfootball

Defined in:
lib/worldfootball.rb,
lib/worldfootball.rb,
lib/worldfootball/mods.rb,
lib/worldfootball/page.rb,
lib/worldfootball/build.rb,
lib/worldfootball/cache.rb,
lib/worldfootball/stages.rb,
lib/worldfootball/vacuum.rb,
lib/worldfootball/convert.rb,
lib/worldfootball/leagues.rb,
lib/worldfootball/version.rb,
lib/worldfootball/download.rb,
lib/worldfootball/page_team.rb,
lib/worldfootball/page_report.rb,
lib/worldfootball/page_schedule.rb,
lib/worldfootball/convert_reports.rb,
lib/worldfootball/build-parse_score.rb

Overview

todo - move generate to generate file!!!

Defined Under Namespace

Classes: Configuration, LeagueConfig, Metal, Page

Constant Summary collapse

MODS =
{}
SCORE_ERRORS =
{
  'ro.1' => {
    ## 2013/14
    '2013-07-29' => [ 'FC Brașov', 'Săgeata Năvodari', ['1-1 (0-0, 0-1)', '1-1 (0-0)']],
  },
  'gr.1' => {
    ## 2010/11
    '2010-11-24' => [ 'Ergotelis',    'Olympiakos Piräus', ['0-2 (0-0, 0-0, 0-0)', '0-2 (0-0)']],
    '2010-11-28' => [ 'Panserraikos', 'Aris Saloniki',     ['1-0 (1-0, 0-0, 0-0)', '1-0 (1-0)']],
  },
  'at.cup' => {
     ## 2023/24
     '2023-07-22' => [ 'SV Leobendorf', 'SV Horn', ['3-2 (2-0, 2-2, 3-2) n.V.', '3-2 (2-0, 2-2) n.V.']],
  },
}
MAX_HEADERS =
[
'Stage',    # 0
'Round',    # 1
'Date',     # 2
'Time',     # 3
'Timezone',  #4
'Team 1',    #5
'FT',      #6
'HT',      #7
'Team 2',   #8
'ET',
'P',
'Comments',    ## e.g. awarded, cancelled/canceled, etc.
'UTC']
MIN_HEADERS =

always keep even if all empty

[   ## always keep even if all empty
'Date',
'Team 1',
'FT',
'Team 2'
]
LEAGUES =
LeagueConfig.new
MAJOR =

todo: namespace inside version or something - why? why not??

0
MINOR =
3
PATCH =
1
VERSION =
[MAJOR,MINOR,PATCH].join('.')

Class Method Summary collapse

Class Method Details



12
13
14
# File 'lib/worldfootball/version.rb', line 12

def self.banner
  "worldfootball/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
end

.build(rows, season:, league:, stage: '') ⇒ Object

build “standard” match records from “raw” table rows

Raises:

  • (ArgumentError)


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/worldfootball/build.rb', line 8

def self.build( rows, season:, league:, stage: '' )   ## rename to fixup or such - why? why not?
   season = Season( season )  ## cast (ensure) season class (NOT string, integer, etc.)

   ## note: do NOT pass in league struct! pass in key (string)
   raise ArgumentError, "league key as string expected"  unless league.is_a?(String)

   print "  #{rows.size} row(s) - Worldfootball.build #{league} #{season}"
   print " - #{stage}" unless stage.empty?
   print "\n"


   zone = find_zone!( league: league, season: season )


   ## note: use only first part from key for lookup
   ##    e.g. at.1  => at
   ##         eng.1 => eng
   ##     and so on
   mods = MODS[ league.split('.')[0] ] || {}

   score_errors = SCORE_ERRORS[ league ] || {}


   i = 0
   recs = []
   rows.each do |row|
     i += 1


  if row[:round] =~ /Spieltag/
    puts
    print '[%03d] ' % (i+1)
    print row[:round]

    if (m = row[:round].match( /^(?<num>[0-9]+)\. Spieltag$/ ))
      ## todo/check: always use a string even if number (as a string eg. '1' etc.)
      round = m[:num]  ## note: keep as string (NOT number)
      print " => #{round}"
    else
      puts "!! ERROR: cannot find matchday number in >#{row[:round]}<:"
      pp row
      exit 1
    end
    print "\n"

  ## note - must start line e.g.
  ##            do NOT match => Qual. 1. Runde  (1. Runde)!!!
  else
    puts
    print '[%03d] ' % (i+1)
    print row[:round]

    round_new = map_round( row[:round], league: league, season: season )

    if round_new
      round = round_new
      print " => #{round}"
      print "\n"
    else
      round = row[:round]
      puts "!! WARN: unknown round >#{row[:round]}< for league >#{league} #{season}<:"
      pp row
    end
  end


    date_str  = row[:date]
    time_str  = row[:time]
    team1_str = row[:team1]
    team2_str = row[:team2]
    score_str = row[:score]



    ### check for score_error; first (step 1) lookup by date
    score_error = score_errors[ date_str ]
    if score_error
      if team1_str == score_error[0] &&
         team2_str == score_error[1]
         ## check if team names match too; if yes, apply fix/patch!!
         if score_str != score_error[2][0]
           puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
           pp row
         end
         puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
         score_str = score_error[2][1]
      end
    end


    print '[%03d]    ' % (i+1)
    print "%-10s | " % date_str
    print "%-5s | "  % time_str
    print "%-22s | " % team1_str
    print "%-22s | " % team2_str
    print score_str
    print "\n"



    ## clean team name (e.g. remove (old))
    ##   and asciify (e.g. ’ to ' )
    team1_str = norm_team( team1_str )
    team2_str = norm_team( team2_str )

    team1_str = mods[ team1_str ]   if mods[ team1_str ]
    team2_str = mods[ team2_str ]   if mods[ team2_str ]


    ht, ft, et, pen, comments = parse_score( score_str )


   ###################
   ### calculate date & times
   ## convert date from string e.g. 2019-25-10
   ## date = Date.strptime( date_str, '%Y-%m-%d' )

   if time_str.nil? || time_str.empty?
       ## no time
       ##   assume  00:00:00T
       time_str     = ''
       timezone     = ''
       utc          = ''
   else
      ## note - assume central european (summer) time (cet/cest) - UTC+1 or UTC+2
      cet = CET.strptime( "#{date_str} #{time_str}", '%Y-%m-%d %H:%M' )

      utc = cet.getutc   ## convert to utc
      local =  zone.to_local( utc )  # convert to local via utc
      ## overwrite old with local
      date_str = local.strftime( '%Y-%m-%d' )
      time_str = local.strftime( '%H:%M' )

      ## pretty print timezone
      ###   todo/fix - bundle into fmt_timezone method or such for reuse
      tz_abbr   =  local.strftime( '%Z' )   ## e.g. EEST or if not available +03 or such
      tz_offset =  local.strftime( '%z' )   ##  e.g. +0300

      timezone =  if tz_abbr =~ /^[+-][0-9]+$/   ## only digits (no abbrev.)
                     tz_offset
                  else
                      "#{tz_abbr}/#{tz_offset}"
                  end

      utc      = utc.strftime( '%Y-%m-%dT%H:%MZ' )
   end


    recs <<  [stage,
              round,
              date_str,
              time_str,
              timezone,
              team1_str,
              ft,
              ht,
              team2_str,
              et,              # extra: incl. extra time
              pen,             # extra: incl. penalties
              comments,
              utc]
   end  # each row
   recs
end

.configObject



76
# File 'lib/worldfootball.rb', line 76

def self.config()    @config ||= Configuration.new;  end

.configure {|config| ... } ⇒ Object

lets you use

Worldfootball.configure do |config|
   config.convert.out_dir = './o'
end

Yields:



75
# File 'lib/worldfootball.rb', line 75

def self.configure() yield( config ); end

.convert(league:, season:, overwrite: true) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/worldfootball/convert.rb', line 5

def self.convert( league:, season:,
                  overwrite: true )
  season = Season( season )  ## cast (ensure) season class (NOT string, integer, etc.)

  league = find_league!( league )
  pages  = league.pages!( season: season )


  out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
  if !overwrite && File.exist?( out_path )
    ## skip generation
    puts "  OK #{league.key} #{season}   (do NOT overwrite)"
    return
  end



    ## collect all teams
    teams_by_ref = {}

    recs = []
    pages.each do |slug, stage|
      ## note: stage might be nil
      ## todo/fix: report error/check if stage is nil!!!
      stage ||= ''

      ## try to map stage name if new name defined/found
      unless stage.empty?
         stage_new  =  map_stage( stage, league: league.key,
                                         season: season )
         stage = stage_new  if stage_new
      end


      print "  parsing #{slug}..."

      # unless File.exist?( path )
      #  puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
      #  next
      # end

      page = Page::Schedule.from_cache( slug )
      print "  title=>#{page.title}<..."
      print "\n"

      rows = page.matches

      teams = page.teams
      ## e.g. {:count=>2, :name=>"AS Arta", :ref=>"as-arta"},
      ##      {:count=>4, :name=>"Dekedaha FC", :ref=>"dekedaha-fc"},
      ##        ...
      teams.each do |h|
          team_count = h[:count]
          team_name  = norm_team( h[:name] )      ## note: norm team name!!!
          team_ref   = h[:ref]

###
##   quick fix for broken refs/links
##    olympique-lyon => olympique-lyonnais
       #   team_ref = 'olympique-lyonnais'   if team_ref == 'olympique-lyon'

          ## note: skip N.N.  (place holder team)
          ##        team_ref is nil etc.
          next if team_name == 'N.N.'

          team_stat = teams_by_ref[ team_ref ] ||= { count: 0,
                                                     names:  [] }
          team_stat[:count] += team_count
          team_stat[:names] << team_name   unless team_stat[:names].include?( team_name )
      end


      stage_recs = build( rows,
                          season: season,
                          league: league.key,
                          stage: stage )

      pp stage_recs[0]   ## check first record
      recs += stage_recs
    end


    clubs_intl  =  ['uefa.cl', 'uefa.el', 'uefa.conf',
                    'uefa.cl.q', 'uefa.el.q', 'uefa.conf.q',
                     'copa.l',
                    'concacaf.cl',
                    'caf.cl',
                    'afl',
                  ].include?(league.key) ? true : false

    ####
    #   auto-add (fifa) country code if int'l club tournament
    if clubs_intl
 ##
 ##   get country codes for team ref
       teams_by_ref.each do |team_slug, h|
          Metal.download_team( team_slug, cache: true )
          team_page = Page::Team.from_cache( team_slug )
          props = team_page.props
          pp props
          country_name = props[:country]
          cty = Fifa.world.find_by_name( country_name )
          if cty.nil?
            puts "!! ERROR - no country found for #{country_name}"
            exit 1
          end
          h[:code] = cty.code
       end

       ## generate lookup by name
       teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)|
  ### todo/fix
  ##    report warning if names size is > 1!!!!
  ##
             rec[:names].each do |name|
                h[ name ] = rec
              end
              h
       end


    #####
    ## dump team refs
    puts "  #{teams_by_ref.size} team(s) by ref:"
    pp teams_by_ref

    ## quick hack
    ##  add country (fifa) codes to team names
        recs.each do |rec|
           team1_org  =  rec[5]
           if team1_org != 'N.N.'   ## note - skip place holder; keep as-is
             country_code = teams_by_name[team1_org][:code]
             rec[5]  = "#{team1_org} (#{country_code})"
           end

           team2_org = rec[8]
           if team2_org != 'N.N.'   ## note - skip place holder; keep as-is
             country_code = teams_by_name[team2_org][:code]
             rec[8]  = "#{team2_org} (#{country_code})"
           end
        end
    end


##   note:  sort matches by date before saving/writing!!!!
##     note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
## note: assume date is third column!!! (stage/round/date/...)

### note - do NOT sort for now
##    keep "original" page order - why? why not?
## recs = recs.sort { |l,r| l[2] <=> r[2] }


## reformat date / beautify e.g. Sat Aug 7 1993
recs.each do |rec|
            if rec[2]
              if rec[2] =~ /^\d{4}-\d{1,2}-\d{1,2}$/
               rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' )
              else
                ## report unknown date format warning
                puts "WARN - unsupported date format (cannot parse?) >#{rec[2]}<"
              end
            end
       end

   ## remove unused columns (e.g. stage, et, p, etc.)
   recs, headers = vacuum( recs )

   puts headers
   pp recs[0]   ## check first record

   puts "   writing to >#{out_path}< - #{recs.size} record(s)..."
   write_csv( out_path, recs, headers: headers )

   ## add to tmp too for debugging
   out_path2 = "#{config.convert.out_dir}/tmp/#{league.key}/#{season.to_path}.csv"
   puts "   writing to >#{out_path2}< - #{recs.size} record(s)..."
   write_csv( out_path2, recs, headers: headers )
end

.convert_reports(league:, season:) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/worldfootball/convert_reports.rb', line 4

def self.convert_reports( league:, season: )
  season = Season( season )  ## cast (ensure) season class (NOT string, integer, etc.)

  league = find_league( league )

   ## note: use only first part from key for lookup
   ##    e.g. at.1  => at
   ##         eng.1 => eng
   ##     and so on
   mods = MODS[ league.key.split('.')[0] ] || {}



  pages = league.pages( season: season )

  recs = []

  ## if single (simple) page setup - wrap in array
  pages = pages.is_a?(Array) ? pages : [pages]
  pages.each do |page_meta|  # note: use page_info for now (or page_rec or page_meta or such)

    page = Page::Schedule.from_cache( page_meta[:slug] )
    print "  page title=>#{page.title}<..."
    print "\n"

    matches = page.matches

    puts "matches - #{matches.size} rows:"
    pp matches[0]

    puts "#{page.generated_in_days_ago}  - #{page.generated}"


    matches.each_with_index do |match,i|

      report_ref = match[:report_ref]
      if report_ref.nil?
        puts "!! WARN: no match report ref found for match:"
        pp match
        next
      end

      puts "reading #{i+1}/#{matches.size} - #{report_ref}..."
      report = Page::Report.from_cache( report_ref )

      puts
      puts report.title
      puts report.generated

      rows = report.goals
      puts "goals - #{rows.size} records"
      ## pp rows


      if rows.size > 0
        ## add goals
        date = Date.strptime( match[:date], '%Y-%m-%d')

        team1 = match[:team1]
        team2 = match[:team2]

        ## clean team name (e.g. remove (old))
        ##   and asciify (e.g. ’ to ' )
        team1 = norm_team( team1 )
        team2 = norm_team( team2 )

        team1 = mods[ team1 ]   if mods[ team1 ]
        team2 = mods[ team2 ]   if mods[ team2 ]

        match_id = "#{team1} - #{team2} | #{date.strftime('%b %-d %Y')}"


        rows.each do |row|
          extra = if row[:owngoal]
                   '(og)'  ## or use OG or O.G.- why? why not?
                  elsif row[:penalty]
                   '(pen)' ## or use P or PEN - why? why not?
                  else
                    ''
                  end

          rec = [match_id,
                row[:score],
                "#{row[:minute]}'",
                extra,
                row[:player],
                row[:notes]]
          recs << rec
        end
      end
     end #  each match
    end # each page

  ## pp recs

  out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}~goals.csv"

  headers  = ['Match', 'Score', 'Minute', 'Extra', 'Player', 'Notes']

  puts "write #{out_path}..."
  Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
end

.debug=(value) ⇒ Object

add a global debug flag



41
# File 'lib/worldfootball.rb', line 41

def self.debug=(value) @debug = value; end

.debug?Boolean

note: default is FALSE

Returns:

  • (Boolean)


42
# File 'lib/worldfootball.rb', line 42

def self.debug?()      @debug ||= false; end

.find_league!(league_code) ⇒ Object

(strict) lookup convenience helpers with error reporting

AND abort if no lookup found


155
156
157
158
159
160
161
162
163
164
# File 'lib/worldfootball/leagues.rb', line 155

def self.find_league!( league_code )
  league = LEAGUES[ league_code ]
  if league.nil?
     puts "!! ERROR - no config found for #{league_code}; leagues incl:"
     puts LEAGUES.keys.join( ', ' )
     puts "  #{LEAGUES.size} leagues(s)"
     exit 1
  end
  league
end

.find_league_pages!(league:, season:) ⇒ Object



166
167
168
169
170
# File 'lib/worldfootball/leagues.rb', line 166

def self.find_league_pages!( league:, season: )
  league = find_league!( league )
  pages  = league.pages!( season: season )
  pages
end

.generate(league:, season:, overwrite: true) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/worldfootball.rb', line 85

def self.generate( league:, season:,
                   overwrite: true )
   season = Season( season )  ## cast (ensure) season class (NOT string, integer, etc.)

   league = find_league!( league )
   pages  = league.pages!( season: season )


   out_path = if season >= Season( '2000' )
                "#{config.generate.out_dir}/#{season.to_path}/#{league.key}.txt"
              else
                decade = season.start_year - (season.start_year%10)
                ## use archive-style before 2000!!!
                "#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league.key}.txt"
              end

   ## check if output exists already
   if !overwrite && File.exist?( out_path )
     ## skip generation
     puts "  OK #{league.key} #{season}   (do NOT overwrite)"
     return
   end


   ## get matches
   path = "#{config.convert.out_dir}/#{season.to_path}/#{league.key}.csv"
   puts "  ---> reading matches in #{path} ..."
   matches = SportDb::CsvMatchParser.read( path )
   puts "     #{matches.size} matches"

   ## build
   txt = SportDb::TxtMatchWriter.build( matches )
   puts txt


   buf = String.new
   ## note - use league key for league name for now!!
   buf << "= #{league.key.upcase.gsub('.', ' ')} #{season.key}\n\n"
   buf << txt

   puts "   writing to >#{out_path}<..."
   write_text( out_path, buf )

   ## add to tmp too for debugging
   out_path2 = "#{config.generate.out_dir}/tmp/#{league.key}/#{season.to_path}.txt"
   puts "   writing to >#{out_path2}<..."
   write_text( out_path2, buf )
end

.list_pagesObject

todo/check - rename to/use list_cached_pages



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/worldfootball/cache.rb', line 5

def self.list_pages     ## todo/check - rename to/use list_cached_pages  
    start_time = Time.now   ## todo: use Timer? t = Timer.start / stop / diff etc. - why? why not?

    # pages = Dir.glob( './dl/at*' )
    pages = Dir.glob( "#{Webcache.root}/www.weltfussball.de/alle_spiele/*.html" )
    puts "  #{pages.size} page(s)"   #=> 576 pages
    puts
    
       
    leagues = {}
    
    pages.each do |path|
       basename = File.basename( path, File.extname( path ) )
       print "%-50s" % basename
       print " => "
    
       page = Worldfootball.find_page( basename )
       if page
         league_key = page[:league]
         season_key = page[:season]
    
         print "    "
         print "%-12s"    % league_key
         print "| %-10s"  % season_key
         print "\n"
    
         seasons = leagues[league_key] ||= []
         seasons << season_key   unless seasons.include?( season_key )
       else
         print "??"
         print "\n"
       end
    end
        
    puts "  #{pages.size} page(s)"   #=> 576 pages
    puts
        
    end_time = Time.now
    diff_time = end_time - start_time
    puts "convert_all: done in #{diff_time} sec(s)"
end

.log(msg) ⇒ Object

append to log



44
45
46
47
48
49
# File 'lib/worldfootball.rb', line 44

def self.log( msg )  ### append to log
   File.open( './logs.txt', 'a:utf-8' ) do |f|
     f.write( msg )
     f.write( "\n" )
   end
end

.map_round(round, league:, season:) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/worldfootball/stages.rb', line 27

def self.map_round( round, league:, season: )
    @rounds ||= begin
                    rounds = {}
                    recs = read_csv( "#{Worldfootball.root}/config/rounds.csv" )
                    recs.each do |rec|
                       rounds[ rec['key'] ] ||= Hash.new
                       rounds[ rec['key'] ][ rec['name1'] ] = rec['name2']
                    end
                    rounds
                end

     ## pp @stages

     league_code = league.to_s.downcase

     name = nil
     name = @rounds[league_code][ round ]  if @rounds.has_key?( league_code )
     name = @rounds['*'][round]            if name.nil?    ## try generic (*) lookup
     name
end

.map_stage(stage, league:, season:) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/worldfootball/stages.rb', line 5

def self.map_stage( stage, league:, season: )
   @stages ||= begin
                   stages = {}
                   recs = read_csv( "#{Worldfootball.root}/config/stages.csv" )
                   recs.each do |rec|
                      stages[ rec['key'] ] ||= Hash.new
                      stages[ rec['key'] ][ rec['name1'] ] = rec['name2']
                   end
                   stages
               end

    ## pp @stages

    league_code = league.to_s.downcase

    name = nil
    name = @stages[league_code][ stage ]  if @stages.has_key?( league_code )
    name = @stages['*'][stage]            if name.nil?    ## try generic (*) lookup
    name
end

.norm_team(team) ⇒ Object

“global” helpers



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/worldfootball/mods.rb', line 10

def self.norm_team( team )
   ## clean team name and asciify (e.g. ’->' )
   team = team.sub( '(old)', '' ).strip

   ## e.g. Hawke’s Bay United FC   or
   ##         ASC Monts d`Or Chasselay   or
   ##        VV Heerlen ´16  / EMM ´15 / Wormer SV´30 / Swift ´36 / etc.
   team = team.gsub( /[’´`]/, "'" )


   ## br
   ## Criciúma - SC      =>     Criciúma SC
   ## Bahia - BA         =>     Bahia BA
   ##  cz
   ## Baník Most - Souš  =>     Baník Most Souš
   ##  remove inline dash ( - ) with single space
   ##  to log
   team = team.gsub( /[ ]+[-][ ]+/, ' ' )


   ##   todo:
   ##  replace (A)  with II
   ##    Austria Wien (A)   =>   Austria Wien (A)
   ##   others too?  - move to mods instead of generic rule - why? why not?
   team = team.sub( /[ ]+\(A\)/, ' II' )

##
##  remove ()  - used/reserved for country code for now - why? why not?
##  e.g. Lloyds FC (Sittingbourne)  => Lloyds FC Sittingbourne
##       August 1st (Army Team)     => August 1st Army Team
##
## add warning - why? why not?
   team = team.sub( /\(
                        ([^)]+?)   ## eat-up all non-greed to next )
                     \)/x, '\1' )

##
## strip special case
##   MFK Frýdek-Místek, a.s.   =>  MFK Frýdek-Místek
     team = team.sub( ', a.s.', '' )


   ################
   ## quick hack - norm(alize) all N.N. to N.N.
   ##   e.g.
   team = 'N.N.'    if ['Sieger HF 1',
                        'Sieger HF 2'].include?( team )

   team
end

.parse_score(score_str) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/worldfootball/build-parse_score.rb', line 16

def self.parse_score( score_str )
    ## add support for
    ##   3-0 (0-0, 0-0) Wert.
    ##   3-0 (0-0, 0-0) awd.

  ## check for 0:3 Wert.   - change Wert. to awd.  (awarded)
  ## todo/fix - use "hardcoded" Wert\. in regex - why? why not?
  ## score_str = score_str.sub( /Wert\./i, 'awd.' )


  comments = String.new     ## check - rename to/use status or such - why? why not?

  ## split score
  ft  = ''
  ht  = ''
  et  = ''
  pen = ''

##
##  [085]    2021-10-21 | 22:00 | Metropolitanos FC      | LALA FC                | Aufg.
##  !! ERROR - unsupported score format >Aufg.< - sorry; maybe add a score error fix/patch
##  - handle with Aufg.

  if score_str == '---'   ## in the future (no score yet) - was -:-
    ft = ''
    ht = ''
  elsif score_str == 'n.gesp.' ||   ## cancelled (british) / canceled (us)
        score_str == 'ausg.'   ||   ## todo/check: change to some other status ????
        score_str == 'annull.' ||   ## todo/check: change to some other status (see ie 2012) ????
        score_str == 'Aufg.'
    ft = '(*)'
    ht = ''
    comments = 'cancelled'
  elsif score_str == 'abgebr.'  ## abandoned  -- waiting for replay?
    ft = '(*)'
    ht = ''
    comments = 'abandoned'
  elsif score_str == 'verl.'   ## postponed
    ft = ''
    ht = ''
    comments = 'postponed'
  elsif score_str == 'WO'   # walk over
    ##  W.O. or w/o (originally two words: "walk over"),
    ft = '(*)'
    ht = ''
    comments = 'w/o'   ## use walkover - why? why not?
  # 5-4 (0-0, 1-1, 2-2) i.E.
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*
                      \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*,[ ]*
                        ([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*,[ ]*
                       ([0-9]+) [ ]*-[ ]* ([0-9]+)\)
                          [ ]*
                       i\.E\.
                     /x
    pen = "#{$1}-#{$2}"
    ht  = "#{$3}-#{$4}"
    ft  = "#{$5}-#{$6}"
    et  = "#{$7}-#{$8}"
  # 3-2 (0-0, 1-1) i.E.   - note: no extra time!!! only ht,ft!!!
  #                         "popular" in southamerica & mexico
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*
                      \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*,[ ]*
                       ([0-9]+) [ ]*-[ ]* ([0-9]+)\)
                          [ ]*
                       i\.E\.
                     /x
    pen = "#{$1}-#{$2}"
    ht  = "#{$3}-#{$4}"
    ft  = "#{$5}-#{$6}"
    et  = ''
  # 2-1 (1-0, 1-1) n.V
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                      [ ]*
                    \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                       [ ]*,[ ]*
                      ([0-9]+) [ ]*-[ ]* ([0-9]+)
                      \)
                       [ ]*
                       n\.V\.
                     /x
    et  = "#{$1}-#{$2}"
    ht  = "#{$3}-#{$4}"
    ft  = "#{$5}-#{$6}"
  ### auto-patch fix drop last score
  ## 1-3 (0-1, 1-1, 0-2) n.V.  => 1-3 (0-1, 1-1) n.V.
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                      [ ]*
                    \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                       [ ]*,[ ]*
                      ([0-9]+) [ ]*-[ ]* ([0-9]+)
                       [ ]*,[ ]*
                      ([0-9]+) [ ]*-[ ]* ([0-9]+)
                      \)
                       [ ]*
                       n\.V\.
                     /x
    et  = "#{$1}-#{$2}"
    ht  = "#{$3}-#{$4}"
    ft  = "#{$5}-#{$6}"

    puts "!! WARN - auto-fix/patch score - >#{score_str}<"
    ### todo/fix - log auto-patch/fix - for double checking!!!!!
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*
                      \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                      \)
                     /x
    ft = "#{$1}-#{$2}"
    ht = "#{$3}-#{$4}"
  elsif  score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*
                          Wert\.    # ([a-z.]+)
                       /x    ### assume awd. (awarded) always - why? why not?
    ft = "#{$1}-#{$2} (*)"
    ht = ''
    comments = 'awd.'   # awarded - $3
  ##  auto-fix/patch
  ##   drop last scores (only use ft)
  ##     3-0 (0-0, 0-0) awd.
  elsif  score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
                         [ ]*
                       \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*,[ ]*
                        ([0-9]+) [ ]*-[ ]* ([0-9]+)
                       \)
                        [ ]*
                        Wert\.    # ([a-z.]+)
                       /x    ### assume awd. (awarded) always - why? why not?
    ft = "#{$1}-#{$2} (*)"
    ht = ''
    comments = 'awd.'   # awarded - $7
    ## (auto) log case for double checking - why? why not?
  elsif score_str =~ /^([0-9]+)-([0-9]+)$/
     ft = "#{$1}-#{$2}"     ## e.g. see luxemburg and others
     ht = ''
  ## auto-fix/patch
  # 3-3 (0-3, 3-3)  =>  3-3 (0-3) - drop last score
  elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*
                      \(([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*,[ ]*
                        ([0-9]+) [ ]*-[ ]* ([0-9]+)
                      \)$
                     /x
    ft = "#{$1}-#{$2}"
    ht = "#{$3}-#{$4}"

    puts "!! WARN - auto-fix/patch score - >#{score_str}<"
    ### todo/fix - log auto-patch/fix - for double checking!!!!!
  elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
                         [ ]*
                       n\.V\.
                      $/x
    et  = "#{$1}-#{$2}"
    ht  = ''
    ft  = ''
    puts "!! WARN - weird score n.V. only - >#{score_str}<"
  elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
                          [ ]*
                        (?: i\.E\. | n\.P\. )
                       $/x
    pen = "#{$1}-#{$2}"
    et  = ''
    ht  = ''
    ft  = ''
    puts "!! WARN - weird score i.E. (n.P.) only - >#{score_str}<"
  else
     puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
     exit 1
  end

  [ht, ft, et, pen, comments]
end

.reports(league:, season:, cache: true) ⇒ Object

todo/check: rename to reports_for_schedule or such - why? why not?



21
22
23
24
25
26
27
28
# File 'lib/worldfootball/download.rb', line 21

def self.reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
  season = Season( season )   ## cast (ensure) season class (NOT string, integer, etc.)

  pages = find_league_pages!( league: league, season: season )
  pages.each do |slug, _|
    Metal.download_reports_for_schedule( slug, cache: cache )
  end # each page
end

.rootObject



16
17
18
# File 'lib/worldfootball/version.rb', line 16

def self.root
  File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
end

.schedule(league:, season:, overwrite: true) ⇒ Object

porcelain “api”



7
8
9
10
11
12
13
14
15
16
17
18
# File 'lib/worldfootball/download.rb', line 7

def self.schedule( league:, season:, overwrite: true )
  season = Season( season )   ## cast (ensure) season class (NOT string, integer, etc.)

  pages = find_league_pages!( league: league, season: season )
  pages.each do |slug, _|
    if !overwrite && Webcache.cached?( Metal.schedule_url( slug ))
      puts "  OK #{league} #{season}  - #{slug}   (do NOT overwrite)"
    else
      Metal.download_schedule( slug )
    end
  end # each page
end

.vacuum(rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/worldfootball/vacuum.rb', line 26

def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
  ## check for unused columns and strip/remove
  counter = Array.new( MAX_HEADERS.size, 0 )
  rows.each do |row|
     row.each_with_index do |col, idx|
       counter[idx] += 1  unless col.nil? || col.empty?
     end
  end

  pp counter

  ## check empty columns
  headers       = []
  indices       = []
  empty_headers = []
  empty_indices = []

  counter.each_with_index do |num, idx|
     header = MAX_HEADERS[ idx ]
     if num > 0 || (num == 0 && fixed_headers.include?( header ))
       headers << header
       indices << idx
     else
       empty_headers << header
       empty_indices << idx
     end
  end

  if empty_indices.size > 0
    rows = rows.map do |row|
             row_vacuumed = []
             row.each_with_index do |col, idx|
               ## todo/fix: use values or such??
               row_vacuumed << col   unless empty_indices.include?( idx )
             end
             row_vacuumed
         end
    end

  [rows, headers]
end

.versionObject



8
9
10
# File 'lib/worldfootball/version.rb', line 8

def self.version
  VERSION
end