Class: SportDb::Import::CountryIndex

Inherits:
Object
  • Object
show all
Includes:
NameHelper
Defined in:
lib/sportdb/formats/country/country_index.rb

Overview

built-in countries for (quick starter) auto-add

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(recs) ⇒ CountryIndex

Returns a new instance of CountryIndex.



11
12
13
14
15
16
17
# File 'lib/sportdb/formats/country/country_index.rb', line 11

def initialize( recs )
  @countries         = []
  @countries_by_code = {}
  @countries_by_name = {}

  add( recs )
end

Instance Attribute Details

#countriesObject (readonly)

all country records



9
10
11
# File 'lib/sportdb/formats/country/country_index.rb', line 9

def countries
  @countries
end

Instance Method Details

#add(recs) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/sportdb/formats/country/country_index.rb', line 28

def add( recs )
  ###########################################
  ## auto-fill countries
  ## pp recs
  recs.each do |rec|
    ## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}

    @countries << rec

    ## add codes lookups - key, code, ...
    if @countries_by_code[ rec.key ]
      puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
      exit 1
    else
      @countries_by_code[ rec.key ] = rec
    end

    ## add  code (only) if different from key
    if rec.key != rec.code.downcase
      if @countries_by_code[ rec.code.downcase ]
        puts "** !! ERROR !! country code  >#{rec.code.downcase}< already exits!!"
        exit 1
      else
        @countries_by_code[ rec.code.downcase ] = rec
      end
    end


    ##  add all names (canonical name + alt names
    names = [rec.name] + rec.alt_names
    more_names = []
    ## check "hand-typed" names for year (auto-add)
    ## check for year(s) e.g. (1887-1911), (-2013),
    ##                        (1946-2001,2013-) etc.
    names.each do |name|
      if has_year?( name )
        more_names <<  strip_year( name )
      end
    end

    names += more_names
    ## check for duplicates - simple check for now - fix/improve
    ## todo/fix: (auto)remove duplicates - why? why not?
    count      = names.size
    count_uniq = names.uniq.size
    if count != count_uniq
      puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
      pp names
      pp rec
      exit 1
    end

    names.each_with_index do |name,i|
      ## check lang codes e.g. [en], [fr], etc.
      ##  todo/check/fix:  move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
      name = strip_lang( name )
      norm = normalize( name )
      old_rec = @countries_by_name[ norm ]
      if old_rec
        ## check if country name already is included or is new country rec
          msg = "** !!! ERROR !!! - name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<"
          puts msg
          exit 1
      else
        @countries_by_name[ norm ] = rec
      end
    end

  end  ## each record
end

#find(key) ⇒ Object Also known as: []



112
113
114
115
116
# File 'lib/sportdb/formats/country/country_index.rb', line 112

def find( key )
  country = find_by_code( key )
  country = find_by_name( key )  if country.nil?     ## try lookup / find by (normalized) name
  country
end

#find_by_code(code) ⇒ Object

fix/todo: add find_by (alias for find_by_name/find_by_code)



102
103
104
105
# File 'lib/sportdb/formats/country/country_index.rb', line 102

def find_by_code( code )
  code = code.to_s.downcase   ## allow symbols (and always downcase e.g. AUT to aut etc.)
  @countries_by_code[ code ]
end

#find_by_name(name) ⇒ Object



107
108
109
110
# File 'lib/sportdb/formats/country/country_index.rb', line 107

def find_by_name( name )
  name = normalize( name.to_s )  ## allow symbols too (e.g. use to.s first)
  @countries_by_name[ name ]
end

#parse(line) ⇒ Object

split/parse country line

split on bullet e.g.
 split into name and code with regex - make code optional

Examples:
  Österreich • Austria (at)
  Österreich • Austria
  Austria
  Deutschland (de) • Germany

 todo/check: support more formats - why? why not?
     e.g.  Austria, AUT  (e.g. with comma - why? why not?)


134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/sportdb/formats/country/country_index.rb', line 134

def parse( line )
  values = line.split( '' )   ## use/support multi-lingual separator
  country = nil
  values.each do |value|
     value = value.strip
     ## check for trailing country code e.g. (at), (eng), etc.
     if value =~ /[ ]+\((?<code>[a-z]{1,4})\)$/  ## e.g. Austria (at)
       code =  $~[:code]
       name = value[0...(value.size-code.size-2)].strip  ## note: add -2 for brackets
       candidates = [ find_by_code( code ), find_by_name( name ) ]
       if candidates[0].nil?
         puts "** !!! ERROR !!! country - unknown code >#{code}< in line: #{line}"
         pp line
         exit 1
       end
       if candidates[1].nil?
         puts "** !!! ERROR !!! country - unknown name >#{code}< in line: #{line}"
         pp line
         exit 1
       end
       if candidates[0] != candidates[1]
         puts "** !!! ERROR !!! country - name and code do NOT match the same country:"
         pp line
         pp candidates
         exit 1
       end
       if country && country != candidates[0]
         puts "** !!! ERROR !!! country - names do NOT match the same country:"
         pp line
         pp country
         pp candidates
         exit 1
       end
       country = candidates[0]
     else
       ## just assume value is name or code
       candidate = find( value )
       if candidate.nil?
         puts "** !!! ERROR !!! country - unknown name or code >#{value}< in line: #{line}"
         pp line
         exit 1
       end
       if country && country != candidate
         puts "** !!! ERROR !!! country - names do NOT match the same country:"
         pp line
         pp country
         pp candidate
         exit 1
       end
       country = candidate
     end
  end
  country
end