Class: SportDb::Import::WikiReader

Inherits:
Object
  • Object
show all
Defined in:
lib/sportdb/formats/team/wiki_reader.rb

Overview

todo/check: rename to WikiClubReader - why? why not?

Defined Under Namespace

Classes: WikiClub

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(txt) ⇒ WikiReader

Returns a new instance of WikiReader.



30
31
32
# File 'lib/sportdb/formats/team/wiki_reader.rb', line 30

def initialize( txt )
  @txt = txt
end

Class Method Details

.parse(txt) ⇒ Object



26
27
28
# File 'lib/sportdb/formats/team/wiki_reader.rb', line 26

def self.parse( txt )
  new( txt ).parse
end

.read(path) ⇒ Object

use - rename to read_file or from_file etc. - why? why not?



21
22
23
24
# File 'lib/sportdb/formats/team/wiki_reader.rb', line 21

def self.read( path )   ## use - rename to read_file or from_file etc. - why? why not?
  txt = File.open( path, 'r:utf-8' ) { |f| f.read }
  parse( txt )
end

Instance Method Details

#catalogObject



18
# File 'lib/sportdb/formats/team/wiki_reader.rb', line 18

def catalog() Import.catalog; end

#parseObject



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/sportdb/formats/team/wiki_reader.rb', line 34

def parse
  recs = []
  last_country = nil  ## note: supports only one level of headings for now (and that is a country)

  @txt.each_line do |line|
    line = line.strip

    next if line.empty?
    next if line.start_with?( '#' )   ## skip comments too

    ## strip inline (until end-of-line) comments too
    ##  e.g Eupen        => KAS Eupen,    ## [de]
    ##   => Eupen        => KAS Eupen,
    line = line.sub( /#.*/, '' ).strip
    pp line


    next if line =~ /^={1,}$/          ## skip "decorative" only heading e.g. ========

     ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
     ##  todo/check:  allow ===  Text  =-=-=-=-=-=   too - why? why not?
    if line =~ /^(={1,})       ## leading ======
                 ([^=]+?)      ##  text   (note: for now no "inline" = allowed)
                 =*            ## (optional) trailing ====
                 $/x
       heading_marker = $1
       heading_level  = $1.length   ## count number of = for heading level
       heading        = $2.strip

       puts "heading #{heading_level} >#{heading}<"

       if heading_level > 1
         puts "** !!! ERROR [wiki reader] !!! -  - headings level too deep - only top / one level supported for now; sorry"
         exit 1
       end

      ## assume country in heading; allow all "formats" supported by parse e.g.
      ##   Österreich • Austria (at)
      ##   Österreich • Austria
      ##   Austria
      ##   Deutschland (de) • Germany
      country = catalog.countries.parse( heading )
      ## check country code - MUST exist for now!!!!
      if country.nil?
        puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
        exit 1
      end

      last_country = country
      pp last_country
    else
      ## strip and  squish (white)spaces
      #   e.g. New York FC      (2011-)  => New York FC (2011-)
      value = line.strip.gsub( /[ \t]+/, ' ' )

      ## normalize (allow underscore (-) - replace with space)
      ##  e.g. Cercle_Brugge_K.S.V. =>  Cercle Brugge K.S.V.
      value = value.gsub( '_', ' ' )

      if last_country.nil?
        puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
        exit 1
      end

      rec = WikiClub.new( value, last_country )
      recs << rec
    end
  end  # each_line
  recs
end