Class: SportDb::Import::CountryIndex
- Inherits:
-
Object
- Object
- SportDb::Import::CountryIndex
- Includes:
- NameHelper
- Defined in:
- lib/sportdb/formats/country/country_index.rb
Overview
built-in countries for (quick starter) auto-add
Instance Attribute Summary collapse
-
#countries ⇒ Object
readonly
all country records.
Instance Method Summary collapse
- #add(recs) ⇒ Object
- #find(key) ⇒ Object (also: #[])
-
#find_by_code(code) ⇒ Object
fix/todo: add find_by (alias for find_by_name/find_by_code).
- #find_by_name(name) ⇒ Object
-
#initialize(recs) ⇒ CountryIndex
constructor
A new instance of CountryIndex.
-
#parse(line) ⇒ Object
split/parse country line.
Constructor Details
#initialize(recs) ⇒ CountryIndex
Returns a new instance of CountryIndex.
11 12 13 14 15 16 17 |
# File 'lib/sportdb/formats/country/country_index.rb', line 11 def initialize( recs ) @countries = [] @countries_by_code = {} @countries_by_name = {} add( recs ) end |
Instance Attribute Details
#countries ⇒ Object (readonly)
all country records
9 10 11 |
# File 'lib/sportdb/formats/country/country_index.rb', line 9 def countries @countries end |
Instance Method Details
#add(recs) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/sportdb/formats/country/country_index.rb', line 28 def add( recs ) ########################################### ## auto-fill countries ## pp recs recs.each do |rec| ## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'} @countries << rec ## add codes lookups - key, code, ... if @countries_by_code[ rec.key ] puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!" exit 1 else @countries_by_code[ rec.key ] = rec end ## add code (only) if different from key if rec.key != rec.code.downcase if @countries_by_code[ rec.code.downcase ] puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!" exit 1 else @countries_by_code[ rec.code.downcase ] = rec end end ## add all names (canonical name + alt names names = [rec.name] + rec.alt_names more_names = [] ## check "hand-typed" names for year (auto-add) ## check for year(s) e.g. (1887-1911), (-2013), ## (1946-2001,2013-) etc. names.each do |name| if has_year?( name ) more_names << strip_year( name ) end end names += more_names ## check for duplicates - simple check for now - fix/improve ## todo/fix: (auto)remove duplicates - why? why not? count = names.size count_uniq = names.uniq.size if count != count_uniq puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):" pp names pp rec exit 1 end names.each_with_index do |name,i| ## check lang codes e.g. [en], [fr], etc. ## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not? name = strip_lang( name ) norm = normalize( name ) old_rec = @countries_by_name[ norm ] if old_rec ## check if country name already is included or is new country rec msg = "** !!! ERROR !!! - name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<" puts msg exit 1 else @countries_by_name[ norm ] = rec end end end ## each record end |
#find(key) ⇒ Object Also known as: []
112 113 114 115 116 |
# File 'lib/sportdb/formats/country/country_index.rb', line 112 def find( key ) country = find_by_code( key ) country = find_by_name( key ) if country.nil? ## try lookup / find by (normalized) name country end |
#find_by_code(code) ⇒ Object
fix/todo: add find_by (alias for find_by_name/find_by_code)
102 103 104 105 |
# File 'lib/sportdb/formats/country/country_index.rb', line 102 def find_by_code( code ) code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.) @countries_by_code[ code ] end |
#find_by_name(name) ⇒ Object
107 108 109 110 |
# File 'lib/sportdb/formats/country/country_index.rb', line 107 def find_by_name( name ) name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first) @countries_by_name[ name ] end |
#parse(line) ⇒ Object
split/parse country line
split on bullet e.g.
split into name and code with regex - make code optional
Examples:
Österreich • Austria (at)
Österreich • Austria
Austria
Deutschland (de) • Germany
todo/check: support more formats - why? why not?
e.g. Austria, AUT (e.g. with comma - why? why not?)
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/sportdb/formats/country/country_index.rb', line 134 def parse( line ) values = line.split( '•' ) ## use/support multi-lingual separator country = nil values.each do |value| value = value.strip ## check for trailing country code e.g. (at), (eng), etc. if value =~ /[ ]+\((?<code>[a-z]{1,4})\)$/ ## e.g. Austria (at) code = $~[:code] name = value[0...(value.size-code.size-2)].strip ## note: add -2 for brackets candidates = [ find_by_code( code ), find_by_name( name ) ] if candidates[0].nil? puts "** !!! ERROR !!! country - unknown code >#{code}< in line: #{line}" pp line exit 1 end if candidates[1].nil? puts "** !!! ERROR !!! country - unknown name >#{code}< in line: #{line}" pp line exit 1 end if candidates[0] != candidates[1] puts "** !!! ERROR !!! country - name and code do NOT match the same country:" pp line pp candidates exit 1 end if country && country != candidates[0] puts "** !!! ERROR !!! country - names do NOT match the same country:" pp line pp country pp candidates exit 1 end country = candidates[0] else ## just assume value is name or code candidate = find( value ) if candidate.nil? puts "** !!! ERROR !!! country - unknown name or code >#{value}< in line: #{line}" pp line exit 1 end if country && country != candidate puts "** !!! ERROR !!! country - names do NOT match the same country:" pp line pp country pp candidate exit 1 end country = candidate end end country end |