Class: SportDb::Import::LeagueReader
- Inherits:
-
Object
- Object
- SportDb::Import::LeagueReader
- Includes:
- LogUtils::Logging
- Defined in:
- lib/sportdb/helpers/league_reader.rb
Class Method Summary collapse
- .parse(txt) ⇒ Object
-
.read(path) ⇒ Object
use - rename to read_file or from_file etc.
Instance Method Summary collapse
-
#_norm(str) ⇒ Object
norm(alize) helper - squish (spaces) and remove dollars ($$$) and remove leading and trailing spaces.
- #_squish(str) ⇒ Object
-
#initialize(txt) ⇒ LeagueReader
constructor
A new instance of LeagueReader.
- #parse ⇒ Object
Constructor Details
#initialize(txt) ⇒ LeagueReader
Returns a new instance of LeagueReader.
22 23 24 |
# File 'lib/sportdb/helpers/league_reader.rb', line 22 def initialize( txt ) @txt = txt end |
Class Method Details
.parse(txt) ⇒ Object
14 15 16 |
# File 'lib/sportdb/helpers/league_reader.rb', line 14 def self.parse( txt ) new( txt ).parse end |
.read(path) ⇒ Object
use - rename to read_file or from_file etc. - why? why not?
9 10 11 12 |
# File 'lib/sportdb/helpers/league_reader.rb', line 9 def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not? txt = File.open( path, 'r:utf-8' ) { |f| f.read } parse( txt ) end |
Instance Method Details
#_norm(str) ⇒ Object
norm(alize) helper - squish (spaces)
and remove dollars ($$$)
and remove leading and trailing spaces
137 138 139 140 |
# File 'lib/sportdb/helpers/league_reader.rb', line 137 def _norm( str ) ## only extra clean-up of dollars for now ($$$) _squish( str.gsub( '$', '' ) ) end |
#_squish(str) ⇒ Object
142 143 144 |
# File 'lib/sportdb/helpers/league_reader.rb', line 142 def _squish( str ) str.gsub( /[ \t\u00a0]+/, ' ' ).strip end |
#parse ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/sportdb/helpers/league_reader.rb', line 26 def parse recs = [] last_rec = nil country = nil # last country intl = false # is international (league/tournament/cup/competition) clubs = true # or clubs|national teams OutlineReader.parse( @txt ).each do |node| if [:h1,:h2,:h3,:h4,:h5,:h6].include?( node[0] ) heading_level = node[0][1].to_i heading = node[1] logger.debug "heading #{heading_level} >#{heading}<" if heading_level != 1 puts "** !!! ERROR !!! unsupported heading level; expected heading 1 for now only; sorry" pp line exit 1 else logger.debug "heading (#{heading_level}) >#{heading}<" last_heading = heading ## map to country or international / int'l or national teams if heading =~ /national team/i ## national team tournament country = nil intl = true clubs = false elsif heading =~ /international|int'l/i ## int'l club tournament country = nil intl = true clubs = true else ## assume country in heading; allow all "formats" supported by parse e.g. ## Österreich • Austria (at) ## Österreich • Austria ## Austria ## Deutschland (de) • Germany country = Country.parse_heading( heading ) intl = false clubs = true ## check country code - MUST exist for now!!!! if country.nil? puts "!!! error [league reader] - unknown country >#{heading}< - sorry - add country to config to fix" exit 1 end end end elsif node[0] == :p ## paragraph with (text) lines lines = node[1] lines.each do |line| if line.start_with?( '|' ) ## assume continuation with line of alternative names ## note: skip leading pipe values = line[1..-1].split( '|' ) # team names - allow/use pipe(|) values = values.map {|value| _norm(value) } ## squish/strip etc. logger.debug "alt_names: #{values.join( '|' )}" last_rec.alt_names += values else ## assume "regular" line ## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!) if line =~ /^([a-z0-9][a-z0-9.]*)[ ]+(.+)$/ league_key = $1 ## 1) strip (commercial) sponsor markers/tags e.g $$ ## 2) strip and squish (white)spaces league_name = _norm( $2 ) logger.debug "key: >#{league_key}<, name: >#{league_name}<" ## prepend country key/code if country present ## todo/fix: only auto-prepend country if key/code start with a number (level) or incl. cup ## why? lets you "overwrite" key if desired - use it - why? why not? if country league_key = "#{country.key}.#{league_key}" end rec = League.new( key: league_key, name: league_name, country: country, intl: intl, clubs: clubs) recs << rec last_rec = rec else puts "** !!! ERROR !!! missing key for (canonical) league name" exit 1 end end end # each line else puts "** !!! ERROR !!! [league reader] - unknown line type:" pp node exit 1 end ## pp line end recs end |