Class: SportDb::DateFinder

Inherits:
DateFinderBase show all
Includes:
LogUtils::Logging
Defined in:
lib/sportdb/finders/date.rb

Constant Summary collapse

MONTH_FR =

todo: make more generic for reuse fix: move to textutils

   date/fr.yml  en.yml etc. ???
why? why not?
'Janvier|Janv|Jan|' +
'Février|Févr|Fév|' +
'Mars|Mar|' +
'Avril|Avri|Avr|' +
'Mai|'  +
'Juin|' +
'Juillet|Juil|' +
'Août|' +
'Septembre|Sept|' +
'Octobre|Octo|Oct|' +
'Novembre|Nove|Nov|' +
'Décembre|Déce|Déc'
WEEKDAY_FR =
'Lundi|Lun|L|' +
'Mardi|Mar|Ma|' +
'Mercredi|Mer|Me|' +
'Jeudi|Jeu|J|' +
'Vendredi|Ven|V|' +
'Samedi|Sam|S|' +
'Dimanche|Dim|D|'
MONTH_EN =
'January|Jan|'+
'February|Feb|'+
'March|Mar|'+
'April|Apr|'+
'May|'+
'June|Jun|'+
'July|Jul|'+
'August|Aug|'+
'September|Sept|Sep|'+
'October|Oct|'+
'November|Nov|'+
'December|Dec'
MONTH_ES =
'Enero|Ene|'+
'Feb|'+
'Marzo|Mar|'+
'Abril|Abr|'+
'Mayo|May|'+
'Junio|Jun|'+
'Julio|Jul|'+
'Agosto|Ago|'+
'Sept|Set|Sep|'+
'Oct|'+
'Nov|'+
'Dic'
DB__DATE_TIME_REGEX =

e.g. 2012-09-14 20:30 => YYYY-MM-DD HH:MM

nb: allow 2012-9-3 7:30 e.g. no leading zero required

regex_db

/\b
                 (?<year>\d{4})
 -
                 (?<month>\d{1,2})
 -
                 (?<day>\d{1,2})
\s+
                 (?<hours>\d{1,2})
 :
                 (?<minutes>\d{2})
\b/x
DB__DATE_REGEX =

e.g. 2012-09-14 w/ implied hours (set to 12:00)

nb: allow 2012-9-3 e.g. no leading zero required

regex_db2

/\b
                    (?<year>\d{4})
  -
                    (?<month>\d{1,2})
  -
                    (?<day>\d{1,2})
\b/x
DD_MM_YYYY__DATE_TIME_REGEX =

e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM

nb: allow 2.3.2012 e.g. no leading zero required
nb: allow hour as 20.30

regex_de

/\b
                          (?<day>\d{1,2})
\.
                          (?<month>\d{1,2})
\.
                          (?<year>\d{4})
\s+
                          (?<hours>\d{1,2})
[:.]
                          (?<minutes>\d{2})
\b/x
DD_MM__DATE_TIME_REGEX =

e.g. 14.09. 20:30 => DD.MM. HH:MM

nb: allow 2.3.2012 e.g. no leading zero required
nb: allow hour as 20.30  or 3.30 instead of 03.30

regex_de2

/\b
                        (?<day>\d{1,2})
 \.
                        (?<month>\d{1,2})
 \.
 \s+
                        (?<hours>\d{1,2})
 [:.]
                        (?<minutes>\d{2})
\b/x
DD_MM_YYYY__DATE_REGEX =

e.g. 14.09.2012 => DD.MM.YYYY w/ implied hours (set to 12:00) regex_de3

/\b
                    (?<day>\d{1,2})
\.
                    (?<month>\d{1,2})
\.
                    (?<year>\d{4})
\b/x
DD_MM__DATE_REGEX =

e.g. 14.09. => DD.MM. w/ implied year and implied hours (set to 12:00)

note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume

regex_de4 (use lookahead assert)

/\b
(?<day>\d{1,2})
   \.
(?<month>\d{1,2})
   \.
(?=\s+|$|[\]])/x
EN__DD_MONTH_YYYY__DATE_TIME_REGEX =

e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM

/\b
                (?<day>\d{1,2})
 \s
                (?<month_en>#{MONTH_EN})
 \s
                (?<year>\d{4})
 \s+
                (?<hours>\d{1,2})
:
                (?<minutes>\d{2})
\b/x
EN__DD_MONTH__DATE_REGEX =

e.g. 12 May => D|DD.MMM w/ implied year and implied hours

/\b
                (?<day>\d{1,2})
\s
                (?<month_en>#{MONTH_EN})
\b/x
EN__MONTH_DD_YYYY__DATE_TIME_REGEX =

e.g. Jun/12 2011 14:00

/\b
                   (?<month_en>#{MONTH_EN})
\/
                   (?<day>\d{1,2})
\s
                   (?<year>\d{4})
\s+
                   (?<hours>\d{1,2})
:
                   (?<minutes>\d{2})
\b/x
EN__MONTH_DD__DATE_TIME_REGEX =

e.g. Jun/12 14:00 w/ implied year H|HH:MM

/\b
                   (?<month_en>#{MONTH_EN})
\/
                   (?<day>\d{1,2})
\s+
                   (?<hours>\d{1,2})
:
                   (?<minutes>\d{2})
\b/x
EN__MONTH_DD_YYYY__DATE_REGEX =

e.g. Jun/12 2013 w/ implied hours (set to 12:00)

/\b
                (?<month_en>#{MONTH_EN})
 \/
                (?<day>\d{1,2})
 \s
                (?<year>\d{4})
\b/x
EN__MONTH_DD__DATE_REGEX =

e.g. Jun/12 w/ implied year and implied hours (set to 12:00)

note: allow space too e.g Jun 12   -- check if conflicts w/ other formats??? (added for rsssf reader)
 -- fix: might eat french weekday mar 12  is mardi (mar)  !!! see FR__ pattern
fix: remove  space again for now - and use simple en date reader or something!!!
was [\/ ]   changed back to \/
/\b
                   (?<month_en>#{MONTH_EN})
 \/
                   (?<day>\d{1,2})
\b/x
ES__DD_MONTH__DATE_REGEX =

e.g. 12 Ene w/ implied year and implied hours (set to 12:00)

/\b
                   (?<day>\d{1,2})
\s
                   (?<month_es>#{MONTH_ES})
\b/x
FR__WEEKDAY_DD_MONTH__DATE_REGEX =

e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août] note: do NOT consume [] in regex (use lookahead assert)

/\b
       (?:#{WEEKDAY_FR})   # note: skip weekday for now; do NOT capture
\s+
       (?<day>\d{1,2})
\.?        # note: make dot optional
\s+
       (?<month_fr>#{MONTH_FR})
(?=\s+|$|[\]])/x
FORMATS =

map table - 1) tag, 2) regex - note: order matters; first come-first matched/served

[
  [ '[YYYY_MM_DD_hh_mm]',        DB__DATE_TIME_REGEX         ],
  [ '[YYYY_MM_DD]',              DB__DATE_REGEX              ],
  [ '[DD_MM_YYYY_hh_mm]',        DD_MM_YYYY__DATE_TIME_REGEX ],
  [ '[DD_MM_hh_mm]',             DD_MM__DATE_TIME_REGEX ],
  [ '[DD_MM_YYYY]',              DD_MM_YYYY__DATE_REGEX ],
  [ '[DD_MM]',                   DD_MM__DATE_REGEX ],
  [ '[FR_WEEKDAY_DD_MONTH]',     FR__WEEKDAY_DD_MONTH__DATE_REGEX ],
  [ '[EN_DD_MONTH_YYYY_hh_mm]',  EN__DD_MONTH_YYYY__DATE_TIME_REGEX ],
  [ '[EN_MONTH_DD_YYYY_hh_mm]',  EN__MONTH_DD_YYYY__DATE_TIME_REGEX ],
  [ '[EN_MONTH_DD_hh_mm]',       EN__MONTH_DD__DATE_TIME_REGEX ],
  [ '[EN_MONTH_DD_YYYY]',        EN__MONTH_DD_YYYY__DATE_REGEX ],
  [ '[EN_MONTH_DD]',             EN__MONTH_DD__DATE_REGEX ],
  [ '[EN_DD_MONTH]',             EN__DD_MONTH__DATE_REGEX ],
  [ '[ES_DD_MONTH]',             ES__DD_MONTH__DATE_REGEX ]
]

Constants inherited from DateFinderBase

SportDb::DateFinderBase::MONTH_EN_TO_MM, SportDb::DateFinderBase::MONTH_ES_TO_MM, SportDb::DateFinderBase::MONTH_FR_TO_MM

Instance Method Summary collapse

Constructor Details

#initializeDateFinder

Returns a new instance of DateFinder.



362
363
364
# File 'lib/sportdb/finders/date.rb', line 362

def initialize
  # nothing here for now
end

Instance Method Details

#find!(line, opts = {}) ⇒ Object



366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# File 'lib/sportdb/finders/date.rb', line 366

def find!( line, opts={} )
  # fix: use more lookahead for all required trailing spaces!!!!!
  # fix: use <name capturing group> for month,day,year etc.!!!

  #
  # fix: !!!!
  #   date in [] will become [[DATE.DE4]] - when getting removed will keep ]!!!!
  #   fix: change regex to \[[A-Z0-9.]\]  !!!!!!  plus add unit test too!!!
  #
 
  md = nil
  FORMATS.each do |format|
    tag     = format[0]
    pattern = format[1]
    md=pattern.match( line )
    if md
      date = parse_date_time( md, opts )
      ## fix: use md[0] e.g. match for sub! instead of using regex again - why? why not???
      ## fix: use md.begin(0), md.end(0)
      line.sub!( md[0], tag )
      ## todo/fix: make sure match data will not get changed (e.g. using sub! before parse_date_time)
      return date
    end
    # no match; continue; try next pattern
  end

  return nil  # no match found
end