Class: SportDb::Import::WikiIndex

Inherits:
Object
  • Object
show all
Includes:
NameHelper
Defined in:
lib/sportdb/config/wiki_index.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(recs) ⇒ WikiIndex

Returns a new instance of WikiIndex.



41
42
43
44
45
46
47
48
49
50
# File 'lib/sportdb/config/wiki_index.rb', line 41

def initialize( recs )
  @pages_by_country = {}

  ## todo/fix:

  ##   check for duplicate recs - report and exit on dupliate!!!!!!

  recs.each do |rec|
    h = @pages_by_country[ rec.country.key ] ||= {}
    h[ normalize( strip_wiki( rec.name )) ] = rec
  end
end

Class Method Details

.build(path) ⇒ Object



9
10
11
12
13
14
15
16
17
18
# File 'lib/sportdb/config/wiki_index.rb', line 9

def self.build( path )
  pack = Package.new( path )
  recs = []
  pack.each_clubs_wiki do |entry|
    recs += WikiReader.parse( entry.read )
  end
  recs

  new( recs )
end

Instance Method Details

#find_by(club:) ⇒ Object

todo/check: use find_by_club - why? why not?



53
54
55
# File 'lib/sportdb/config/wiki_index.rb', line 53

def find_by( club: )    ## todo/check: use find_by_club - why? why not?

  find_by_club( club )
end

#find_by_club(club) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/sportdb/config/wiki_index.rb', line 57

def find_by_club( club )
  rec = nil

  ## get query params from club

  names   = [club.name]+club.alt_names
  country_key = club.country.key

  h = @pages_by_country[ country_key ]
  if h
    ## todo/check: sort names ?

    ##   sort by longest first (for best match)

    names.each do |name|
      ## note: normalize AND sanitize (e.g. remove/string year and lang e.g. (1946-2001), [en] too)

      rec = h[ normalize( strip_year( strip_lang( name ))) ]
      break if rec   ## bingo!! found - break on first match

    end
  end

  rec  ## note: return nil if nothing found

end

#strip_wiki(name) ⇒ Object

fix/todo:

also used / duplicated in ClubIndex#add_wiki !!!


26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/sportdb/config/wiki_index.rb', line 26

def strip_wiki( name )     # todo/check: rename to strip_wikipedia_en - why? why not?

  ## note: strip disambiguationn qualifier from wikipedia page name if present

  ##        note: only remove year and foot... for now

  ## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck

  ##      Willem II (football club)  => Willem II

  ##

  ## e.g. do NOT strip others !! e.g.

  ##   América Futebol Clube (MG)

  ##  only add more "special" cases on demand (that, is) if we find more

  name = name.gsub( /\([12][^\)]+?\)/, '' ).strip  ## starting with a digit 1 or 2 (assuming year)

  name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip  ## starting with foot (assuming football ...)

  name
end