Module: Worldfootball
- Defined in:
- lib/worldfootball.rb,
lib/worldfootball.rb,
lib/worldfootball/mods.rb,
lib/worldfootball/page.rb,
lib/worldfootball/build.rb,
lib/worldfootball/cache.rb,
lib/worldfootball/stages.rb,
lib/worldfootball/vacuum.rb,
lib/worldfootball/convert.rb,
lib/worldfootball/leagues.rb,
lib/worldfootball/version.rb,
lib/worldfootball/download.rb,
lib/worldfootball/page_team.rb,
lib/worldfootball/page_report.rb,
lib/worldfootball/page_schedule.rb,
lib/worldfootball/convert_reports.rb,
lib/worldfootball/build-parse_score.rb
Overview
todo - move generate to generate file!!!
Defined Under Namespace
Classes: Configuration, LeagueConfig, Metal, Page
Constant Summary collapse
- MODS =
{}
- SCORE_ERRORS =
{ 'ro.1' => { ## 2013/14 '2013-07-29' => [ 'FC Brașov', 'Săgeata Năvodari', ['1-1 (0-0, 0-1)', '1-1 (0-0)']], }, 'gr.1' => { ## 2010/11 '2010-11-24' => [ 'Ergotelis', 'Olympiakos Piräus', ['0-2 (0-0, 0-0, 0-0)', '0-2 (0-0)']], '2010-11-28' => [ 'Panserraikos', 'Aris Saloniki', ['1-0 (1-0, 0-0, 0-0)', '1-0 (1-0)']], }, 'at.cup' => { ## 2023/24 '2023-07-22' => [ 'SV Leobendorf', 'SV Horn', ['3-2 (2-0, 2-2, 3-2) n.V.', '3-2 (2-0, 2-2) n.V.']], }, }
- MAX_HEADERS =
[ 'Stage', # 0 'Round', # 1 'Date', # 2 'Time', # 3 'Timezone', #4 'Team 1', #5 'FT', #6 'HT', #7 'Team 2', #8 'ET', 'P', 'Comments', ## e.g. awarded, cancelled/canceled, etc. 'UTC']
- MIN_HEADERS =
always keep even if all empty
[ ## always keep even if all empty 'Date', 'Team 1', 'FT', 'Team 2' ]
- LEAGUES =
LeagueConfig.new
- MAJOR =
todo: namespace inside version or something - why? why not??
0
- MINOR =
3
- PATCH =
1
- VERSION =
[MAJOR,MINOR,PATCH].join('.')
Class Method Summary collapse
- .banner ⇒ Object
-
.build(rows, season:, league:, stage: '') ⇒ Object
build “standard” match records from “raw” table rows.
- .config ⇒ Object
-
.configure {|config| ... } ⇒ Object
lets you use Worldfootball.configure do |config| config.convert.out_dir = ‘./o’ end.
- .convert(league:, season:, overwrite: true) ⇒ Object
- .convert_reports(league:, season:) ⇒ Object
-
.debug=(value) ⇒ Object
add a global debug flag.
-
.debug? ⇒ Boolean
note: default is FALSE.
-
.find_league!(league_code) ⇒ Object
(strict) lookup convenience helpers with error reporting AND abort if no lookup found.
- .find_league_pages!(league:, season:) ⇒ Object
- .generate(league:, season:, overwrite: true) ⇒ Object
-
.list_pages ⇒ Object
todo/check - rename to/use list_cached_pages.
-
.log(msg) ⇒ Object
append to log.
- .map_round(round, league:, season:) ⇒ Object
- .map_stage(stage, league:, season:) ⇒ Object
-
.norm_team(team) ⇒ Object
“global” helpers.
- .parse_score(score_str) ⇒ Object
-
.reports(league:, season:, cache: true) ⇒ Object
todo/check: rename to reports_for_schedule or such - why? why not?.
- .root ⇒ Object
-
.schedule(league:, season:, overwrite: true) ⇒ Object
porcelain “api”.
- .vacuum(rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS) ⇒ Object
- .version ⇒ Object
Class Method Details
.banner ⇒ Object
12 13 14 |
# File 'lib/worldfootball/version.rb', line 12 def self. "worldfootball/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})" end |
.build(rows, season:, league:, stage: '') ⇒ Object
build “standard” match records from “raw” table rows
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
# File 'lib/worldfootball/build.rb', line 8 def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not? season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) ## note: do NOT pass in league struct! pass in key (string) raise ArgumentError, "league key as string expected" unless league.is_a?(String) print " #{rows.size} row(s) - Worldfootball.build #{league} #{season}" print " - #{stage}" unless stage.empty? print "\n" zone = find_zone!( league: league, season: season ) ## note: use only first part from key for lookup ## e.g. at.1 => at ## eng.1 => eng ## and so on mods = MODS[ league.split('.')[0] ] || {} score_errors = SCORE_ERRORS[ league ] || {} i = 0 recs = [] rows.each do |row| i += 1 if row[:round] =~ /Spieltag/ puts print '[%03d] ' % (i+1) print row[:round] if (m = row[:round].match( /^(?<num>[0-9]+)\. Spieltag$/ )) ## todo/check: always use a string even if number (as a string eg. '1' etc.) round = m[:num] ## note: keep as string (NOT number) print " => #{round}" else puts "!! ERROR: cannot find matchday number in >#{row[:round]}<:" pp row exit 1 end print "\n" ## note - must start line e.g. ## do NOT match => Qual. 1. Runde (1. Runde)!!! else puts print '[%03d] ' % (i+1) print row[:round] round_new = map_round( row[:round], league: league, season: season ) if round_new round = round_new print " => #{round}" print "\n" else round = row[:round] puts "!! WARN: unknown round >#{row[:round]}< for league >#{league} #{season}<:" pp row end end date_str = row[:date] time_str = row[:time] team1_str = row[:team1] team2_str = row[:team2] score_str = row[:score] ### check for score_error; first (step 1) lookup by date score_error = score_errors[ date_str ] if score_error if team1_str == score_error[0] && team2_str == score_error[1] ## check if team names match too; if yes, apply fix/patch!! if score_str != score_error[2][0] puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}" pp row end puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}" score_str = score_error[2][1] end end print '[%03d] ' % (i+1) print "%-10s | " % date_str print "%-5s | " % time_str print "%-22s | " % team1_str print "%-22s | " % team2_str print score_str print "\n" ## clean team name (e.g. remove (old)) ## and asciify (e.g. ’ to ' ) team1_str = norm_team( team1_str ) team2_str = norm_team( team2_str ) team1_str = mods[ team1_str ] if mods[ team1_str ] team2_str = mods[ team2_str ] if mods[ team2_str ] ht, ft, et, pen, comments = parse_score( score_str ) ################### ### calculate date & times ## convert date from string e.g. 2019-25-10 ## date = Date.strptime( date_str, '%Y-%m-%d' ) if time_str.nil? || time_str.empty? ## no time ## assume 00:00:00T time_str = '' timezone = '' utc = '' else ## note - assume central european (summer) time (cet/cest) - UTC+1 or UTC+2 cet = CET.strptime( "#{date_str} #{time_str}", '%Y-%m-%d %H:%M' ) utc = cet.getutc ## convert to utc local = zone.to_local( utc ) # convert to local via utc ## overwrite old with local date_str = local.strftime( '%Y-%m-%d' ) time_str = local.strftime( '%H:%M' ) ## pretty print timezone ### todo/fix - bundle into fmt_timezone method or such for reuse tz_abbr = local.strftime( '%Z' ) ## e.g. EEST or if not available +03 or such tz_offset = local.strftime( '%z' ) ## e.g. +0300 timezone = if tz_abbr =~ /^[+-][0-9]+$/ ## only digits (no abbrev.) tz_offset else "#{tz_abbr}/#{tz_offset}" end utc = utc.strftime( '%Y-%m-%dT%H:%MZ' ) end recs << [stage, round, date_str, time_str, timezone, team1_str, ft, ht, team2_str, et, # extra: incl. extra time pen, # extra: incl. penalties comments, utc] end # each row recs end |
.config ⇒ Object
76 |
# File 'lib/worldfootball.rb', line 76 def self.config() @config ||= Configuration.new; end |
.configure {|config| ... } ⇒ Object
lets you use
Worldfootball.configure do |config|
config.convert.out_dir = './o'
end
75 |
# File 'lib/worldfootball.rb', line 75 def self.configure() yield( config ); end |
.convert(league:, season:, overwrite: true) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
# File 'lib/worldfootball/convert.rb', line 5 def self.convert( league:, season:, overwrite: true ) season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) league = find_league!( league ) pages = league.pages!( season: season ) out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv" if !overwrite && File.exist?( out_path ) ## skip generation puts " OK #{league.key} #{season} (do NOT overwrite)" return end ## collect all teams teams_by_ref = {} recs = [] pages.each do |slug, stage| ## note: stage might be nil ## todo/fix: report error/check if stage is nil!!! stage ||= '' ## try to map stage name if new name defined/found unless stage.empty? stage_new = map_stage( stage, league: league.key, season: season ) stage = stage_new if stage_new end print " parsing #{slug}..." # unless File.exist?( path ) # puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<" # next # end page = Page::Schedule.from_cache( slug ) print " title=>#{page.title}<..." print "\n" rows = page.matches teams = page.teams ## e.g. {:count=>2, :name=>"AS Arta", :ref=>"as-arta"}, ## {:count=>4, :name=>"Dekedaha FC", :ref=>"dekedaha-fc"}, ## ... teams.each do |h| team_count = h[:count] team_name = norm_team( h[:name] ) ## note: norm team name!!! team_ref = h[:ref] ### ## quick fix for broken refs/links ## olympique-lyon => olympique-lyonnais # team_ref = 'olympique-lyonnais' if team_ref == 'olympique-lyon' ## note: skip N.N. (place holder team) ## team_ref is nil etc. next if team_name == 'N.N.' team_stat = teams_by_ref[ team_ref ] ||= { count: 0, names: [] } team_stat[:count] += team_count team_stat[:names] << team_name unless team_stat[:names].include?( team_name ) end stage_recs = build( rows, season: season, league: league.key, stage: stage ) pp stage_recs[0] ## check first record recs += stage_recs end clubs_intl = ['uefa.cl', 'uefa.el', 'uefa.conf', 'uefa.cl.q', 'uefa.el.q', 'uefa.conf.q', 'copa.l', 'concacaf.cl', 'caf.cl', 'afl', ].include?(league.key) ? true : false #### # auto-add (fifa) country code if int'l club tournament if clubs_intl ## ## get country codes for team ref teams_by_ref.each do |team_slug, h| Metal.download_team( team_slug, cache: true ) team_page = Page::Team.from_cache( team_slug ) props = team_page.props pp props country_name = props[:country] cty = Fifa.world.find_by_name( country_name ) if cty.nil? puts "!! ERROR - no country found for #{country_name}" exit 1 end h[:code] = cty.code end ## generate lookup by name teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)| ### todo/fix ## report warning if names size is > 1!!!! ## rec[:names].each do |name| h[ name ] = rec end h end ##### ## dump team refs puts " #{teams_by_ref.size} team(s) by ref:" pp teams_by_ref ## quick hack ## add country (fifa) codes to team names recs.each do |rec| team1_org = rec[5] if team1_org != 'N.N.' ## note - skip place holder; keep as-is country_code = teams_by_name[team1_org][:code] rec[5] = "#{team1_org} (#{country_code})" end team2_org = rec[8] if team2_org != 'N.N.' ## note - skip place holder; keep as-is country_code = teams_by_name[team2_org][:code] rec[8] = "#{team2_org} (#{country_code})" end end end ## note: sort matches by date before saving/writing!!!! ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z) ## note: assume date is third column!!! (stage/round/date/...) ### note - do NOT sort for now ## keep "original" page order - why? why not? ## recs = recs.sort { |l,r| l[2] <=> r[2] } ## reformat date / beautify e.g. Sat Aug 7 1993 recs.each do |rec| if rec[2] if rec[2] =~ /^\d{4}-\d{1,2}-\d{1,2}$/ rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) else ## report unknown date format warning puts "WARN - unsupported date format (cannot parse?) >#{rec[2]}<" end end end ## remove unused columns (e.g. stage, et, p, etc.) recs, headers = vacuum( recs ) puts headers pp recs[0] ## check first record puts " writing to >#{out_path}< - #{recs.size} record(s)..." write_csv( out_path, recs, headers: headers ) ## add to tmp too for debugging out_path2 = "#{config.convert.out_dir}/tmp/#{league.key}/#{season.to_path}.csv" puts " writing to >#{out_path2}< - #{recs.size} record(s)..." write_csv( out_path2, recs, headers: headers ) end |
.convert_reports(league:, season:) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# File 'lib/worldfootball/convert_reports.rb', line 4 def self.convert_reports( league:, season: ) season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) league = find_league( league ) ## note: use only first part from key for lookup ## e.g. at.1 => at ## eng.1 => eng ## and so on mods = MODS[ league.key.split('.')[0] ] || {} pages = league.pages( season: season ) recs = [] ## if single (simple) page setup - wrap in array pages = pages.is_a?(Array) ? pages : [pages] pages.each do || # note: use page_info for now (or page_rec or page_meta or such) page = Page::Schedule.from_cache( [:slug] ) print " page title=>#{page.title}<..." print "\n" matches = page.matches puts "matches - #{matches.size} rows:" pp matches[0] puts "#{page.generated_in_days_ago} - #{page.generated}" matches.each_with_index do |match,i| report_ref = match[:report_ref] if report_ref.nil? puts "!! WARN: no match report ref found for match:" pp match next end puts "reading #{i+1}/#{matches.size} - #{report_ref}..." report = Page::Report.from_cache( report_ref ) puts puts report.title puts report.generated rows = report.goals puts "goals - #{rows.size} records" ## pp rows if rows.size > 0 ## add goals date = Date.strptime( match[:date], '%Y-%m-%d') team1 = match[:team1] team2 = match[:team2] ## clean team name (e.g. remove (old)) ## and asciify (e.g. ’ to ' ) team1 = norm_team( team1 ) team2 = norm_team( team2 ) team1 = mods[ team1 ] if mods[ team1 ] team2 = mods[ team2 ] if mods[ team2 ] match_id = "#{team1} - #{team2} | #{date.strftime('%b %-d %Y')}" rows.each do |row| extra = if row[:owngoal] '(og)' ## or use OG or O.G.- why? why not? elsif row[:penalty] '(pen)' ## or use P or PEN - why? why not? else '' end rec = [match_id, row[:score], "#{row[:minute]}'", extra, row[:player], row[:notes]] recs << rec end end end # each match end # each page ## pp recs out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}~goals.csv" headers = ['Match', 'Score', 'Minute', 'Extra', 'Player', 'Notes'] puts "write #{out_path}..." Cache::CsvMatchWriter.write( out_path, recs, headers: headers ) end |
.debug=(value) ⇒ Object
add a global debug flag
41 |
# File 'lib/worldfootball.rb', line 41 def self.debug=(value) @debug = value; end |
.debug? ⇒ Boolean
note: default is FALSE
42 |
# File 'lib/worldfootball.rb', line 42 def self.debug?() @debug ||= false; end |
.find_league!(league_code) ⇒ Object
(strict) lookup convenience helpers with error reporting
AND abort if no lookup found
155 156 157 158 159 160 161 162 163 164 |
# File 'lib/worldfootball/leagues.rb', line 155 def self.find_league!( league_code ) league = LEAGUES[ league_code ] if league.nil? puts "!! ERROR - no config found for #{league_code}; leagues incl:" puts LEAGUES.keys.join( ', ' ) puts " #{LEAGUES.size} leagues(s)" exit 1 end league end |
.find_league_pages!(league:, season:) ⇒ Object
166 167 168 169 170 |
# File 'lib/worldfootball/leagues.rb', line 166 def self.find_league_pages!( league:, season: ) league = find_league!( league ) pages = league.pages!( season: season ) pages end |
.generate(league:, season:, overwrite: true) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/worldfootball.rb', line 85 def self.generate( league:, season:, overwrite: true ) season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) league = find_league!( league ) pages = league.pages!( season: season ) out_path = if season >= Season( '2000' ) "#{config.generate.out_dir}/#{season.to_path}/#{league.key}.txt" else decade = season.start_year - (season.start_year%10) ## use archive-style before 2000!!! "#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league.key}.txt" end ## check if output exists already if !overwrite && File.exist?( out_path ) ## skip generation puts " OK #{league.key} #{season} (do NOT overwrite)" return end ## get matches path = "#{config.convert.out_dir}/#{season.to_path}/#{league.key}.csv" puts " ---> reading matches in #{path} ..." matches = SportDb::CsvMatchParser.read( path ) puts " #{matches.size} matches" ## build txt = SportDb::TxtMatchWriter.build( matches ) puts txt buf = String.new ## note - use league key for league name for now!! buf << "= #{league.key.upcase.gsub('.', ' ')} #{season.key}\n\n" buf << txt puts " writing to >#{out_path}<..." write_text( out_path, buf ) ## add to tmp too for debugging out_path2 = "#{config.generate.out_dir}/tmp/#{league.key}/#{season.to_path}.txt" puts " writing to >#{out_path2}<..." write_text( out_path2, buf ) end |
.list_pages ⇒ Object
todo/check - rename to/use list_cached_pages
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/worldfootball/cache.rb', line 5 def self.list_pages ## todo/check - rename to/use list_cached_pages start_time = Time.now ## todo: use Timer? t = Timer.start / stop / diff etc. - why? why not? # pages = Dir.glob( './dl/at*' ) pages = Dir.glob( "#{Webcache.root}/www.weltfussball.de/alle_spiele/*.html" ) puts " #{pages.size} page(s)" #=> 576 pages puts leagues = {} pages.each do |path| basename = File.basename( path, File.extname( path ) ) print "%-50s" % basename print " => " page = Worldfootball.find_page( basename ) if page league_key = page[:league] season_key = page[:season] print " " print "%-12s" % league_key print "| %-10s" % season_key print "\n" seasons = leagues[league_key] ||= [] seasons << season_key unless seasons.include?( season_key ) else print "??" print "\n" end end puts " #{pages.size} page(s)" #=> 576 pages puts end_time = Time.now diff_time = end_time - start_time puts "convert_all: done in #{diff_time} sec(s)" end |
.log(msg) ⇒ Object
append to log
44 45 46 47 48 49 |
# File 'lib/worldfootball.rb', line 44 def self.log( msg ) ### append to log File.open( './logs.txt', 'a:utf-8' ) do |f| f.write( msg ) f.write( "\n" ) end end |
.map_round(round, league:, season:) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/worldfootball/stages.rb', line 27 def self.map_round( round, league:, season: ) @rounds ||= begin rounds = {} recs = read_csv( "#{Worldfootball.root}/config/rounds.csv" ) recs.each do |rec| rounds[ rec['key'] ] ||= Hash.new rounds[ rec['key'] ][ rec['name1'] ] = rec['name2'] end rounds end ## pp @stages league_code = league.to_s.downcase name = nil name = @rounds[league_code][ round ] if @rounds.has_key?( league_code ) name = @rounds['*'][round] if name.nil? ## try generic (*) lookup name end |
.map_stage(stage, league:, season:) ⇒ Object
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/worldfootball/stages.rb', line 5 def self.map_stage( stage, league:, season: ) @stages ||= begin stages = {} recs = read_csv( "#{Worldfootball.root}/config/stages.csv" ) recs.each do |rec| stages[ rec['key'] ] ||= Hash.new stages[ rec['key'] ][ rec['name1'] ] = rec['name2'] end stages end ## pp @stages league_code = league.to_s.downcase name = nil name = @stages[league_code][ stage ] if @stages.has_key?( league_code ) name = @stages['*'][stage] if name.nil? ## try generic (*) lookup name end |
.norm_team(team) ⇒ Object
“global” helpers
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/worldfootball/mods.rb', line 10 def self.norm_team( team ) ## clean team name and asciify (e.g. ’->' ) team = team.sub( '(old)', '' ).strip ## e.g. Hawke’s Bay United FC or ## ASC Monts d`Or Chasselay or ## VV Heerlen ´16 / EMM ´15 / Wormer SV´30 / Swift ´36 / etc. team = team.gsub( /[’´`]/, "'" ) ## br ## Criciúma - SC => Criciúma SC ## Bahia - BA => Bahia BA ## cz ## Baník Most - Souš => Baník Most Souš ## remove inline dash ( - ) with single space ## to log team = team.gsub( /[ ]+[-][ ]+/, ' ' ) ## todo: ## replace (A) with II ## Austria Wien (A) => Austria Wien (A) ## others too? - move to mods instead of generic rule - why? why not? team = team.sub( /[ ]+\(A\)/, ' II' ) ## ## remove () - used/reserved for country code for now - why? why not? ## e.g. Lloyds FC (Sittingbourne) => Lloyds FC Sittingbourne ## August 1st (Army Team) => August 1st Army Team ## ## add warning - why? why not? team = team.sub( /\( ([^)]+?) ## eat-up all non-greed to next ) \)/x, '\1' ) ## ## strip special case ## MFK Frýdek-Místek, a.s. => MFK Frýdek-Místek team = team.sub( ', a.s.', '' ) ################ ## quick hack - norm(alize) all N.N. to N.N. ## e.g. team = 'N.N.' if ['Sieger HF 1', 'Sieger HF 2'].include?( team ) team end |
.parse_score(score_str) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/worldfootball/build-parse_score.rb', line 16 def self.parse_score( score_str ) ## add support for ## 3-0 (0-0, 0-0) Wert. ## 3-0 (0-0, 0-0) awd. ## check for 0:3 Wert. - change Wert. to awd. (awarded) ## todo/fix - use "hardcoded" Wert\. in regex - why? why not? ## score_str = score_str.sub( /Wert\./i, 'awd.' ) comments = String.new ## check - rename to/use status or such - why? why not? ## split score ft = '' ht = '' et = '' pen = '' ## ## [085] 2021-10-21 | 22:00 | Metropolitanos FC | LALA FC | Aufg. ## !! ERROR - unsupported score format >Aufg.< - sorry; maybe add a score error fix/patch ## - handle with Aufg. if score_str == '---' ## in the future (no score yet) - was -:- ft = '' ht = '' elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us) score_str == 'ausg.' || ## todo/check: change to some other status ???? score_str == 'annull.' || ## todo/check: change to some other status (see ie 2012) ???? score_str == 'Aufg.' ft = '(*)' ht = '' comments = 'cancelled' elsif score_str == 'abgebr.' ## abandoned -- waiting for replay? ft = '(*)' ht = '' comments = 'abandoned' elsif score_str == 'verl.' ## postponed ft = '' ht = '' comments = 'postponed' elsif score_str == 'WO' # walk over ## W.O. or w/o (originally two words: "walk over"), ft = '(*)' ht = '' comments = 'w/o' ## use walkover - why? why not? # 5-4 (0-0, 1-1, 2-2) i.E. elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+)\) [ ]* i\.E\. /x pen = "#{$1}-#{$2}" ht = "#{$3}-#{$4}" ft = "#{$5}-#{$6}" et = "#{$7}-#{$8}" # 3-2 (0-0, 1-1) i.E. - note: no extra time!!! only ht,ft!!! # "popular" in southamerica & mexico elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+)\) [ ]* i\.E\. /x pen = "#{$1}-#{$2}" ht = "#{$3}-#{$4}" ft = "#{$5}-#{$6}" et = '' # 2-1 (1-0, 1-1) n.V elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+) \) [ ]* n\.V\. /x et = "#{$1}-#{$2}" ht = "#{$3}-#{$4}" ft = "#{$5}-#{$6}" ### auto-patch fix drop last score ## 1-3 (0-1, 1-1, 0-2) n.V. => 1-3 (0-1, 1-1) n.V. elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+) \) [ ]* n\.V\. /x et = "#{$1}-#{$2}" ht = "#{$3}-#{$4}" ft = "#{$5}-#{$6}" puts "!! WARN - auto-fix/patch score - >#{score_str}<" ### todo/fix - log auto-patch/fix - for double checking!!!!! elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) \) /x ft = "#{$1}-#{$2}" ht = "#{$3}-#{$4}" elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* Wert\. # ([a-z.]+) /x ### assume awd. (awarded) always - why? why not? ft = "#{$1}-#{$2} (*)" ht = '' comments = 'awd.' # awarded - $3 ## auto-fix/patch ## drop last scores (only use ft) ## 3-0 (0-0, 0-0) awd. elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+) \) [ ]* Wert\. # ([a-z.]+) /x ### assume awd. (awarded) always - why? why not? ft = "#{$1}-#{$2} (*)" ht = '' comments = 'awd.' # awarded - $7 ## (auto) log case for double checking - why? why not? elsif score_str =~ /^([0-9]+)-([0-9]+)$/ ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others ht = '' ## auto-fix/patch # 3-3 (0-3, 3-3) => 3-3 (0-3) - drop last score elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* \(([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]*,[ ]* ([0-9]+) [ ]*-[ ]* ([0-9]+) \)$ /x ft = "#{$1}-#{$2}" ht = "#{$3}-#{$4}" puts "!! WARN - auto-fix/patch score - >#{score_str}<" ### todo/fix - log auto-patch/fix - for double checking!!!!! elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* n\.V\. $/x et = "#{$1}-#{$2}" ht = '' ft = '' puts "!! WARN - weird score n.V. only - >#{score_str}<" elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+) [ ]* (?: i\.E\. | n\.P\. ) $/x pen = "#{$1}-#{$2}" et = '' ht = '' ft = '' puts "!! WARN - weird score i.E. (n.P.) only - >#{score_str}<" else puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch" exit 1 end [ht, ft, et, pen, comments] end |
.reports(league:, season:, cache: true) ⇒ Object
todo/check: rename to reports_for_schedule or such - why? why not?
21 22 23 24 25 26 27 28 |
# File 'lib/worldfootball/download.rb', line 21 def self.reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not? season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) pages = find_league_pages!( league: league, season: season ) pages.each do |slug, _| Metal.download_reports_for_schedule( slug, cache: cache ) end # each page end |
.root ⇒ Object
16 17 18 |
# File 'lib/worldfootball/version.rb', line 16 def self.root File.( File.dirname(File.dirname(File.dirname(__FILE__))) ) end |
.schedule(league:, season:, overwrite: true) ⇒ Object
porcelain “api”
7 8 9 10 11 12 13 14 15 16 17 18 |
# File 'lib/worldfootball/download.rb', line 7 def self.schedule( league:, season:, overwrite: true ) season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.) pages = find_league_pages!( league: league, season: season ) pages.each do |slug, _| if !overwrite && Webcache.cached?( Metal.schedule_url( slug )) puts " OK #{league} #{season} - #{slug} (do NOT overwrite)" else Metal.download_schedule( slug ) end end # each page end |
.vacuum(rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/worldfootball/vacuum.rb', line 26 def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS ) ## check for unused columns and strip/remove counter = Array.new( MAX_HEADERS.size, 0 ) rows.each do |row| row.each_with_index do |col, idx| counter[idx] += 1 unless col.nil? || col.empty? end end pp counter ## check empty columns headers = [] indices = [] empty_headers = [] empty_indices = [] counter.each_with_index do |num, idx| header = MAX_HEADERS[ idx ] if num > 0 || (num == 0 && fixed_headers.include?( header )) headers << header indices << idx else empty_headers << header empty_indices << idx end end if empty_indices.size > 0 rows = rows.map do |row| row_vacuumed = [] row.each_with_index do |col, idx| ## todo/fix: use values or such?? row_vacuumed << col unless empty_indices.include?( idx ) end row_vacuumed end end [rows, headers] end |
.version ⇒ Object
8 9 10 |
# File 'lib/worldfootball/version.rb', line 8 def self.version VERSION end |