Class: Rsssf::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/rsssf/parser/token.rb,
lib/rsssf/parser/linter.rb,
lib/rsssf/parser/parser.rb,
lib/rsssf/parser/token-date.rb,
lib/rsssf/parser/token-note.rb,
lib/rsssf/parser/token-text.rb,
lib/rsssf/parser/token-goals.rb,
lib/rsssf/parser/token-round.rb,
lib/rsssf/parser/token-score.rb

Defined Under Namespace

Classes: Linter

Constant Summary collapse

BASICS_RE =
%r{
    (?<spaces> [ ]{2,}) |
    (?<space>  [ ]) 
        |
    (?<sym>[;,@|\[\]\(\)])     ## note - add () too  - why? why not?
}ix
VS_RE =
%r{   ## must be space before and after!!!
    (?<vs>
      (?<=[ ])	# Positive lookbehind for space
         -
       (?=[ ])   # positive lookahead for space 
    )
}ix
RE =
Regexp.union(  GROUP_RE, ROUND_RE, LEG_RE,
                    DATE_RE,
                    VS_RE,
                    SCORE_RE,
                    SCORE_AWD_RE, SCORE_ABD_RE, SCORE_PPD_RE, SCORE_NP_RE,
  SCORE_WO_RE,
                    SCORE_EXT_RE,
                    NOTE_RE,
                    BASICS_RE,
TEXT_RE )
HYPHEN_RE =

rename to dash or to ???

used to add/allow hyphen/dash (-) in INSIDE_RE
%r{   ## must be space before and after (or end of line)!!!
  ##  note - uses SYM capture 
    (?<sym>
      (?<=[ ])	# Positive lookbehind for space
         -
       (?=[ ]|$)   # positive lookahead for space 
    )
}ix
SCORE_AT_RE =

rename to ?? use SCORE_AT for now - why? why not?

 add support for score at/score points/markers                
e.g.  [1-0 Andrei 08, 1-1 Rydlewicz 24, 1-2 Prica 85, 2-2 Bella 88,
    2-3 Arvidsson 102]
%r{ (?<score_at> 
      \b
      \d{1,2}-\d{1,2}
      \b
    )  
}ix
INSIDE_RE =

“strict” text match mode inside brackets

]
Regexp.union(  SCORE_AT_RE,
   GOAL_OG_RE, GOAL_PEN_RE,
   BASICS_RE, HYPHEN_RE,
   TEXT_STRICT_RE,
   MINUTE_RE, 
)
SYM_CLOSE =

open/close pairs - lookup close (by open char)

{
  '(' => ')',
  '[' => ']',
}
MONTH_LINES =

note - support only 5 letter max for now

now January|February|August etc.
SportDb::Parser.parse_names( <<TXT )
Jan
Feb
March      Mar
April      Apr
May
June       Jun
July       Jul
Aug
Sept       Sep
Oct
Nov
Dec
TXT
MONTH_NAMES =
SportDb::Parser.build_names( MONTH_LINES )
MONTH_MAP =

pp MONTH_NAMES

SportDb::Parser.build_map( MONTH_LINES, downcase: true )
DAY_LINES =

nnote - only support two or three letters

no Tues | Thur | Thurs | Sunday etc.
SportDb::Parser.parse_names( <<TXT )
Mon  Mo
Tue  Tu
Wed  We
Thu  Th
Fri  Fr
Sat  Sa
Sun  Su
TXT
DAY_NAMES =
SportDb::Parser.build_names( DAY_LINES )
DAY_MAP =

pp DAY_NAMES

SportDb::Parser.build_map( DAY_LINES, downcase: true )
DATE_RE =

e.g. Fri Aug 9

%r{
 ## note - do not include [] in capture for now - why? why not
    ## eat-up/consume optional [] - part i
    (?: \[ | \b
     )
(?<date>

     (?:  ######  
          ## variant I/1/one
          ###   Fri June 24 

     ## optional day name
     ((?<day_name>#{DAY_NAMES})
          [ ]
     )?    
     ##  allow 1 or 2 spaces e.g. Jul  2 / Jun 27 to pretty print
     (?<month_name>#{MONTH_NAMES})
         [ ]{1,2}
     (?<day>\d{1,2})
     ## optional year
     (  [ ]
        (?<year>\d{4})
     )?   
     )
    |
     (?: #### 
         ## variant II/2/two
         ##   17- 3-22   - allow space befor mont
         ##   17-3-22
            \d{1,2}
             -
            [ ]*\d{1,2} 
             -
             (?:
                \d{4} |   ## 2024
                \d{2}     ## or 24 only
             )
     )
     )  ## end date capture
  ## eat-up/consume optional [] - part ii
  (?: \] | \b
  )        
}ix
NOTE_BASICS_RE =

move to token-note(s) file !!!!

%r{
    (?<note_open> \[ )
   (?<note>
     (?:  ##  starting with ___   PLUS requiring more text
       (?:
          nb:
          ##  e.g. [NB: between top-8 of regular season]
          #        [NB: América, Morelia and Tigres qualified on better record regular season]
          #        [NB: Celaya qualified on away goals]
          #        [NB: Alebrijes qualified on away goal]
          #        [NB: Leones Negros qualified on away goals]
          #
          # todo/fix:
          # add "top-level" NB: version
          ##   with full (end-of) line note - why? why not?
          |
          (?: originally[ ])? scheduled
          ## e.g. [originally scheduled to play in Mexico City] 
          |
          rescheduled
          ## e.g.  [Rescheduled due to earthquake occurred in Mexico on September 19]
          |
          remaining
          ## e.g. [remaining 79']   
          ##      [remaining 84'] 
          ##      [remaining 59']   
          ##      [remaining 5']
          |
          played  
          ## e.g. [played in Macaé-RJ]
          ##      [played in Caxias do Sul-RS]
          ##      [played in Sete Lagoas-MG]
          ##      [played in Uberlândia-MG]
          ##      [played in Brasília-DF]
          ##      [played in Vöcklabruck]
          ##      [played in Pasching]
          |
          declared
          ## e.g.  [declared void]
          |
          inter-group
          ## e.g. [inter-group A-B]
          ##      [inter-group C-D]
       )
      [ ]
      [^\]]+?    ## slurp all to next ] - (use non-greedy) 
     )
      |
     (?:
       ## starting with in  - do NOT allow digits
       ##   name starting with in possible - why? why not?
           in[ ]
            [^0-9\]]+?
       ## e.g. [In Estadio La Corregidora] 
       ##      [in Unidad Deportiva Centenario]
       ##      [in Estadio Olímpico Universitario]
       ##      [in Estadio Victoria]
       ##      [in UD José Brindis]
       ##      [in Colomos Alfredo "Pistache" Torres stadium]
     )
      |
      (?:
          (?:
             postponed    
             ## e.g. [postponed due to problems with the screen of the stadium]
             ##      [postponed by storm]
             ##      [postponed due to tropical storm "Hanna"]
             ##      [postponed from Sep 10-12 due to death Queen Elizabeth II]
             ##     [postponed]  -- include why? why not?
             |
             awarded
             ## e.g. [awarded match to Leones Negros by undue alignment; original result 1-2]
             ##     [awarded 3-0 to Cafetaleros by undue alignment; originally ended 2-0]
             ##     [awarded 3-0; originally 0-2, América used ineligible player (Federico Viñas)]
             |
             abandoned
             ## e.g. [abandoned at 1-1 in 65' due to cardiac arrest Luton player Tom Lockyer]
             ##      [abandoned at 0-0 in 6' due to waterlogged pitch]
             ##     [abandoned at 5-0 in 80' due to attack on assistant referee by Cerro; result stood]
             ##    [abandoned at 1-0 in 31']
             ##    [abandoned at 0-1' in 85 due to crowd trouble]
             |
              suspended
              ## e.g. [suspended at 0-0 in 12' due to storm]  
              ##      [suspended at 84' by storm; result stood]
              |
              annulled
              ## e.g.  [annulled]
              |
              replay
              ## e.g.  [replay]
              |
              verified
              ## e.g.  [verified 2:0 wo.]
          )
        ([ ]    ## note - optional text
          [^\]]+?
         )?         ## slurp all to next ] - (use non-greedy) 
      )
    )    # note capture  
        
     (?: 
         (?<note_close> \] )
         | $ ## note - allow open notes (that continue on next line) 
      )  
}ix
NOTE_MORE_RE =
%r{
      (?<=[ ])  ## one (leading) space min. required
       (?<note_cont>
             [⮑…] |
             \.{2,3}   ### .. or ...
       )
        [ ]*
       (?<note>
            [^\]]+?   ## non-greeedy
          )  
       (?: 
         (?<note_close> \] )
         | $ ## note - allow open notes (that continue on next line) 
       )  
}ix
NOTE_RE =
Regexp.union(  NOTE_BASICS_RE, 
 NOTE_MORE_RE, 
)
TEXT_QUOTED =

simple (double) quoted text

only supports a-z (unicode) PLUS (single) inline space
 add more chars - why? why not?
'(?:  "    ' +
'  \p{L}+  ' + 
'     (?: [ ]  ' +
'        \p{L}+ )*   '  + 
'    "  )  '
TEXT_STRICT_RE =
%r{
   (?<text>
         (?: \b |  #{TEXT_QUOTED} [ ]   ## note - leading quoted text must be followed by space!!
          )
          \p{L}+    ## all unicode letters (e.g. [a-z])
           
             (?:
               (?:[ ]
                    |     # only single spaces allowed inline!!!
                   [-]                                              
               )?
               (?:
                  \p{L}+ |
                   ['.] |
                   (?:
                      (?<= [ ])
                      #{TEXT_QUOTED}
                      (?= [ ]|$)   ### must be followed by space
                                  ##  todo/fix - add all end of text lookaheads to (see below)
                   )
               )  
              )*  
               ## must NOT end with space or dash(-)
              ##  todo/fix - possible in regex here
              ##     only end in alphanum a-z0-9 (not dot or & ???)
   
        ## positive lookahead
        ##   cannot use \b  if text ends in dot (.) or other non-alphnum 
        ##        than \b will not work
        ##   not    - add () too for now - why? why not? 
            (?=[ ,;@|\[\]\(\)]  
                 |$
            )  
    )
}ix
TEXT_RE =
%r{
    ## must start with alpha (allow unicode letters!!)      
    (?<text>    
             \b   ## use/require word boundary
            (?:  
                # opt 1 - start with alpha
                 \p{L}+    ## all unicode letters (e.g. [a-z])
                   |

                # opt 2 - start with num!! - allow special case (e.g. 1. FC) 
                     \d+  # check for num lookahead (MUST be space or dot)
                      ## MUST be followed by (optional dot) and
                      ##                      required space !!!
                      ## MUST be follow by a to z!!!!
                      \.?     ## optional dot
                      [ ]?   ## make space optional too  - why? why not?
                             ##  yes - eg. 1st, 2nd, 5th etc.
                       \p{L}+               
               )
        
              (?:(?:  (?:[ ]
                     (?! (awd|abd|ppd|n/p|w/o)[ ])    ## note - exclude (awd[ ]/abd[ ]/n/p[ ])
                       )  
                      |     # only single spaces allowed inline!!!
                     [-]                                              
                  )?
                (?:
                  \p{L}+ | [&/'.] 
                    |
                 (?:
                   \d+ 
                   (?![0-9.:'/+-])   
                   ## negative lookahead for numbers
                   ##   note - include digits itself!!!
                 )  
               )  
              )*  ## must NOT end with space or dash(-)
              ##  todo/fix - possible in regex here
              ##     only end in alphanum a-z0-9 (not dot or & ???)

              ## support (Hamburg) or such at the end (ony)
              ##   note - no numbers allowed inside () for now!!
             (?:
                  [ ]\(\p{L}+
                      (?:
                         (?: [ ] |
                             [-]
                          )? 
                          \p{L}+ | [&/'.]
                        )*
                      \)
             )?


            ## add lookahead/lookbehind
           ##    must be space!!! 
           ##   (or comma or  start/end of string)
           ##   kind of \b !!!
            ## positive lookahead
            ##  note - added : too - why? why not?
            (?=[ ,;@|:\[\]]
                 |$
            )
   )   
}ix
MINUTE_RE =
%r{
     (?<minute>
   \b
      \d{1,3}   
      '?   ## optional minute quote (')
      (?:       
         # optional offset/extra e.g. 45+ / 90+ or 45+10 / 90+5
          (?: \+
            (?: 
               (?! [0-9])   ## negative look ahead (not a number) required
               |    
              (?:
                \d{1,3} 
                '?   ## optional minute quote (')
                (?= (og|o|pen|p)? ([ ;,\]\)]|$))
              )
            )
          )
          |
          (?= (og|o|pen|p)? ([ ;,\]\)]|$))  # note - break can be og|pen|p too
   )
)}ix
GOAL_PEN_RE =

goal types

%r{
   (?<pen> 
        (?<=\d|\+|[ ]|')	## must follow a number or plus (e.g. 45p / 45+p / 45 p / 45'p) or space
            (?: pen|p )
            \b 
    )
}ix
GOAL_OG_RE =
%r{
   (?<og> 
        (?<=\d|\+|[ ]|')	## must follow a number or plus (e.g. 45og / 45+og / 45 og) or space
          (?: og|o )
          \b
   )
}ix
GROUP_RE =

Group A-Z Group 1-99 Group HEX # used in concaf world cup quali Group 1A or A1, B1 - used anywhere

use "key" of group - why? why not?
%r{(?<group>
     \b
    Group [ ]
       [a-z0-9]+
\b)}ix
ROUND_RE =
%r{(?<round>
            \b
   (?:
   # round  - note - requiers number e.g. round 1,2, etc.
(?:  (?: Round |
        Matchday |
        Week
     )
     [ ] [0-9]+
)
|
   # more (kockout) rounds
   # playoffs  - playoff, play-off, play-offs
  (?: Play-?offs? 
     (?: [ ]for[ ]quarter-?finals )?
  )
  |    
   # round32
  (?: Round[ ]of[ ]32 | 
      Last[ ]32 |
      16th[ ]finals |  
      1/16[ ]finals
      )
    |
   # round16   
  (?: Round[ ]of[ ]16 |
      Last[ ]16 | 
      8th[ ]finals |
      1/8[ ]finals 
      )
     |
   # fifthplace
   (?:
       (?: (Fifth|5th)[ -]place 
            (?: [ ] (?: match|play-?off|final ))?
        ) |
       (?: Match[ ]for[ ](?: fifth|5th )[ -]place )
   )
    |
   # thirdplace
    (?: 
        (?: (Third|3rd)[ -]place 
               (?: [ ] (?: match|play-?off|final ))? 
         ) |
        (?: Match[ ]for[ ](?: third|3rd )[ -]place ) 
     )
     |
   # quarterfinals
   (?:
        Quarter-?finals? |
        Quarters |
        Last[ ]8
    )
    |     
   # semifinals
  (?:   
       Semi-?finals? |
       Semis |
       Last[ ]4
  )
  |
   # final
   Finals? 
 )
\b)}ix
LEG_RE =

keep leg separate (from round) - why? why not?

%r{ (?<leg>
              \b
  (?:
   # leg1
 (?: 1st|First)[ ]legs? 
 |
  # leg2 
 (?: 2nd|Second)[ ]legs?
  )
\b)}ix
SCORE_RE =

e.g. 2-1

%r{      
  (?<score>
      (?<=[ ])	# Positive lookbehind for space
         (?<score1>\d{1,2}) - (?<score2>\d{1,2})
      (?=[ ])   # positive lookahead for space 
  )
}ix
SCORE_EXT_RE =
%r{ \[
    (?<score_ext>
        (?:       ## aet only e.g.  aet
           aet
           (?:   ##  optional pen
             [,;][ ]*
             \d{1,2}-\d{1,2} [ ]? pen\.? 
           )?
        )
        |
        (?:   ##  penalty only e.g. 3-2 pen
          \d{1,2}-\d{1,2} [ ]? pen\.?
        )
    )
  \]
}ix
SCORE_AWD_RE =

awd - awarded

%r{  ## must be space before and after!!!
    (?<score_awd>
      (?<=[ ])	# Positive lookbehind for space
        awd
       (?=[ ])   # positive lookahead for space 
    )
}ix
SCORE_ABD_RE =

abd - abandoned

%r{  ## must be space before and after!!!
    (?<score_abd>
      (?<=[ ])	# Positive lookbehind for space
        abd
       (?=[ ])   # positive lookahead for space 
    )
}ix
SCORE_PPD_RE =

ppd - postponed

%r{  ## must be space before and after!!!
    (?<score_ppd>
      (?<=[ ])	# Positive lookbehind for space
        ppd
       (?=[ ])   # positive lookahead for space 
    )
}ix
SCORE_NP_RE =

n/p - not played

%r{  ## must be space before and after!!!
    (?<score_np>
      (?<=[ ])	# Positive lookbehind for space
         n/p
       (?=[ ])   # positive lookahead for space 
    )
}ix
SCORE_WO_RE =

A walkover, also W.O. or w/o (originally two words: “walk over”),

is awarded to the opposing team/player etc,

if there are no other players available, or they have been disqualified, because the other contestants have forfeited or the other contestants have withdrawn from the contest.

w/o  - walk over
%r{  ## must be space before and after!!!
    (?<score_wo>
      (?<=[ ])	# Positive lookbehind for space
         w/o
       (?=[ ])   # positive lookahead for space 
    )
}ix

Instance Method Summary collapse

Instance Method Details

#log(msg) ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/rsssf/parser/token.rb', line 76

def log( msg )
   ## append msg to ./logs.txt  
   ##     use ./errors.txt - why? why not?
   File.open( './logs.txt', 'a:utf-8' ) do |f|
     f.write( msg )
     f.write( "\n" ) 
   end
end

#parse(line, debug: false) ⇒ Object

convience helper - ignore errors by default



92
93
94
95
# File 'lib/rsssf/parser/parser.rb', line 92

def parse( line, debug: false )
  nodes, _ = parse_with_errors( line, debug: debug )
  nodes
end

#parse_with_errors(line, debug: false) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/rsssf/parser/parser.rb', line 20

def parse_with_errors( line, debug: false )
    errors = []
    tokens, token_errors = tokenize_with_errors( line )
    errors += token_errors


=begin
#############
## pass 1 
##   replace all texts with keyword matches (e.g. group, round, leg, etc.)
     tokens = tokens.map do |t|
                      if t[0] == :text
                          text = t[1]
                          if is_group?( text )
                             ### expects to be followed by num (or text ABC??)
                             [:group, text]   
                          elsif is_matchday?( text )
                             ### expects to be followed by num
                             ##  use different name e.g. :fix_round or such?
                             [:matchday, text]   
                          elsif is_leg?( text )
                             [:leg, text]
                          elsif is_round?( text )
                             [:round, text]
                          else
                              t   ## pass through as-is (1:1)
                          end
                      else
                         t
                      end
                end


    ## puts "tokens:"
    ## pp tokens
=end

## transform tokens into (parse tree/ast) nodes    
    nodes = []
    
    ## note - (re)use token buffer from "standard" parser here !!!!
    buf = SportDb::Parser::Tokens.new( tokens )
    ## pp buf


    loop do 
          if buf.match?( :text, [:score, 
                                 :score_awd,
                                 :score_abd,
                                 :score_ppd,
                                 :score_np,
                                 :score_wo,
                                 :vs], :text )
             nodes << [:team, buf.next[1]]
             nodes << buf.next
             nodes << [:team, buf.next[1]]
          elsif buf.match?( :text, :minute )    ## assume player+minute
             nodes << [:player, buf.next[1]]
             nodes << buf.next
          else
             ## pass through
             nodes << buf.next
          end

          break if buf.eos?
    end

    [nodes,errors]
end

#tokenize(line, debug: false) ⇒ Object

convience helper - ignore errors by default



281
282
283
284
# File 'lib/rsssf/parser/token.rb', line 281

def tokenize(  line, debug: false )
   tokens, _ = tokenize_with_errors( line, debug: debug )
   tokens
end

#tokenize_with_errors(line, debug: false) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
# File 'lib/rsssf/parser/token.rb', line 92

def tokenize_with_errors( line, debug: false )
  tokens = []
  errors = []   ## keep a list of errors - why? why not?

  puts ">#{line}<"    if debug

  pos = 0
  ## track last offsets - to report error on no match 
  ##   or no match in end of string
  offsets = [0,0]
  m = nil

  ####
  ## quick hack - keep re state/mode between tokenize calls!!!
  @re  ||= RE     ## note - switch between RE & INSIDE_RE
  


  while m = @re.match( line, pos )
    if debug
      pp m
      puts "pos: #{pos}"  
    end
    offsets = [m.begin(0), m.end(0)]

    if offsets[0] != pos
      ## match NOT starting at start/begin position!!!
      ##  report parse error!!!

      ctx = @re == INSIDE_RE ? 'INSIDE_RE' : 'RE'  ## assume RE
      ## fix/change - use str.inspect to show tabs (\t)
      ##          and possibly other special characters causing trouble     
      msg =  "  !! WARN - parse error (#{ctx}) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]} in line >#{line}<"
      puts msg

      errors << "parse error (#{ctx}) - skipping >#{line[pos..(offsets[0]-1)]}< @#{offsets[0]},#{offsets[1]}"
      log( msg )
    end

    ##
    ## todo/fix - also check if possible
    ##   if no match but not yet end off string!!!!
    ##    report skipped text run too!!!

    pos = offsets[1]

    pp offsets   if debug

    t =  if @re == INSIDE_RE
           if m[:space]
             nil   ## skip space
           elsif m[:spaces]
             nil  ## skip spaces
           elsif m[:text] 
             [:text, m[:text]]   ## keep pos - why? why not?
           elsif m[:minute]
             [:minute, m[:minute]]
           elsif m[:score_at]
             [:score_at, m[:score_at]]
          elsif m[:og]
             [:og, m[:og]]    ## for typed drop - string version/variants
           elsif m[:pen]
             [:pen, m[:pen]]
           elsif m[:sym]
             sym = m[:sym]
             ## return symbols "inline" as is - why? why not?
             case sym
             when ',' then [:',']
             when ';' then [:';']
             when '@' then [:'@']
             when '|' then [:'|']   
             when '-' then [:'-']                
             when '[', '('
               if sym == @sym_open   
                 ## report error - already in inside mode!!!
                 ##  e.g. another [ in [] or ( in ()
                 log( "warn - unexpected (opening) #{sym} in inside (goal) mode in line >#{line}<" )
               end
               nil
             when ']', ')'   ## allow [] AND () for inside mode
               ## puts "  leave inside match mode"
               if sym == @sym_close 
                   @re = RE
                   @sym_open  = nil  ## reset sym_open/close
                   @sym_close = nil
               end
               nil
             else
              nil  ## ignore others (e.g. brackets [])
             end
           else
             ## report error  - why? why not?
             nil
           end    
         else  ## assume standard mode/ctx
           if m[:space]
             nil   ## skip space
           elsif m[:spaces]
             nil  ## skip spaces
           elsif m[:text] 
             [:text, m[:text]]   ## keep pos - why? why not?
           elsif m[:note]
             [:note, m[:note]]
           elsif m[:group]
             [:group, m[:group]]
           elsif m[:round]
             [:round, m[:round]]
           elsif m[:leg]
             [:leg, m[:leg]]
           elsif m[:date]
             [:date, m[:date]]
           elsif m[:vs]
             [:vs, m[:vs]]
           elsif m[:score]
             [:score, m[:score]]
           elsif m[:score_awd]   # awarded (awd)
             [:score_awd, m[:score_awd]]
           elsif m[:score_abd]   # abandoned (abd)
             [:score_abd, m[:score_abd]]
           elsif m[:score_ppd]   # postponed (ppd)
             [:score_ppd, m[:score_ppd]]
           elsif m[:score_np]    # not played (n/p)
             [:score_np, m[:score_np]]
           elsif m[:score_wo]    # walk over (w/o)
             [:score_wo, m[:score_wo]]
           elsif m[:score_ext]
             [:score_ext, m[:score_ext]]
           elsif m[:sym]
             sym = m[:sym]
             ## return symbols "inline" as is - why? why not?
             case sym
             when ',' then [:',']
             when ';' then [:';']
             when '@' then [:'@']
             when '|' then [:'|']   
             when '[', '('
               ##  switch to inside mode!!!
               ## puts "  enter inside match mode"
               @re = INSIDE_RE
               @sym_open  =  sym      ## record open/close style - why? why not?
               @sym_close =  SYM_CLOSE[sym]
               nil
             when ']', ')'
               log( "warn - unexpected (closing) #{sym} in standard mode in line >#{line}<" )   
               ## already in standard mode/ctx
               ##  report warn/error - why? why not?
               nil
             else
               nil  ## ignore others (e.g. brackets [])
             end
           else
             ## report error  - why? why not?
             nil
           end
         end


    tokens << t    if t    

    if debug
      print ">"
      print "*" * pos
      puts "#{line[pos..-1]}<"
    end
  end


  ## check if no match in end of string
  if offsets[1] != line.size

    ## note - report regex context
    ##  e.g.  RE or INSIDE_RE  to help debugging/troubleshooting format errors
    ctx = @re == INSIDE_RE ? 'INSIDE_RE' : 'RE'  ## assume RE
    ## fix/change - use str.inspect to show tabs (\t)
    ##          and possibly other special characters causing trouble     

    msg =  "  !! WARN - parse error (#{ctx}) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size} in line >#{line}<"
    puts msg
    log( msg )

    errors << "parse error (#{ctx}) - skipping >#{line[offsets[1]..-1]}< @#{offsets[1]},#{line.size}"
  end


  [tokens,errors] 
end