Module: DateFormats
- Defined in:
- lib/date-formats.rb,
lib/date-formats.rb,
lib/date-formats/names.rb,
lib/date-formats/parser.rb,
lib/date-formats/reader.rb,
lib/date-formats/formats.rb,
lib/date-formats/version.rb
Overview
todo/fix: make logging class configurable - lets you use logutils etc.
Defined Under Namespace
Modules: Logging Classes: DateParser, Reader, RsssfDateParser
Constant Summary collapse
- MONTH_EN =
helpers for building format regex patterns
build_names( MONTH_NAMES[:en] )
- DAY_EN =
build_names( DAY_NAMES[:en] )
- MONTH_FR =
build_names( MONTH_NAMES[:fr] )
- DAY_FR =
build_names( DAY_NAMES[:fr] )
- MONTH_ES =
build_names( MONTH_NAMES[:es] )
- DAY_ES =
build_names( DAY_NAMES[:es] )
- MONTH_PT =
build_names( MONTH_NAMES[:pt] )
- DAY_PT =
build_names( DAY_NAMES[:pt] )
- MONTH_DE =
build_names( MONTH_NAMES[:de] )
- DAY_DE =
build_names( DAY_NAMES[:de] )
- MONTH_IT =
build_names( MONTH_NAMES[:it] )
- DAY_IT =
build_names( DAY_NAMES[:it] )
- MONTH_NAMES =
note: always sort lines with longest words, abbrevations first!!!!
todo/fix: add/split into MONTH_NAMES and MONTH_ABBREVS (and DAY_NAMES and DAY_ABBREVS) - why? why not?
{}
- DAY_NAMES =
{}
- DB__DATE_TIME_RE =
todo/fix: add rule with allowed / separator (e.g. 2019/12/11)
BUT must be used in all following case too (NO mix'n'match allowed e.g. 2019-11/12)
/\b (?<year>\d{4}) - (?<month>\d{1,2}) - (?<day>\d{1,2}) \s+ (?<hours>\d{1,2}) [:.hH] (?<minutes>\d{2}) \b/x
- DB__DATE_RE =
e.g. 2012-09-14 => YYYY-MM-DD
note: allow 2012-9-3 e.g. no leading zero required
regex_db2
/\b (?<year>\d{4}) - (?<month>\d{1,2}) - (?<day>\d{1,2}) \b/x
- DD_MM_YYYY__DATE_TIME_RE =
e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM
note: allow 2.3.2012 e.g. no leading zero required note: allow hour as 20.30
regex_de
/\b (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. (?<year>\d{4}) \s+ (?<hours>\d{1,2}) [:.hH] (?<minutes>\d{2}) \b/x
- DD_MM__DATE_TIME_RE =
e.g. 14.09. 20:30 => DD.MM. HH:MM
note: allow 2.3.2012 e.g. no leading zero required note: allow hour as 20.30 or 3.30 instead of 03.30
regex_de2
/\b (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. \s+ (?<hours>\d{1,2}) [:.hH] (?<minutes>\d{2}) \b/x
- DD_MM_YYYY__DATE_RE =
e.g. 14.09.2012 => DD.MM.YYYY regex_de3
/\b (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. (?<year>\d{4}) \b/x
- DD_MM__DATE_RE =
e.g. 14.09. => DD.MM. w/ implied year
note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume
regex_de4 (use lookahead assert)
/\b (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. (?=\s+|$|[\]])/x
- EN__DD_MONTH_YYYY__DATE_TIME_RE =
e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM
or 12 May 2013 14h00
/\b (?<day>\d{1,2}) \s (?<month_name>#{MONTH_EN}) \s (?<year>\d{4}) \s+ (?<hours>\d{1,2}) [:hH] (?<minutes>\d{2}) \b/x
- EN__DD_MONTH__DATE_RE =
e.g. 12 May => D|DD.MMM w/ implied year
/\b (?<day>\d{1,2}) \s (?<month_name>#{MONTH_EN}) \b/x
- EN__DAY_MONTH_DD__DATE_RE =
e.g. Fri Aug/9 or Fri Aug 9
Fri, Aug/9 or Fri, Aug 9
/\b (?<day_name>#{DAY_EN}) ,? # note: allow optional comma \s (?<month_name>#{MONTH_EN}) (?: \/|\s ) (?<day>\d{1,2}) \b/x
- EN__DAY_MONTH_DD__DATE_TIME_RE =
e.g. Fri Aug/9 18:00 or Fri Aug 9 18:00
Fri, Aug/9 18:00 or Fri, Aug 9 18:00
/\b (?<day_name>#{DAY_EN}) ,? # note: allow optional comma \s (?<month_name>#{MONTH_EN}) (?: \/|\s ) (?<day>\d{1,2}) \s+ (?<hours>\d{1,2}) [:hH] (?<minutes>\d{2}) \b/x
- EN__MONTH_DD_YYYY__DATE_TIME_RE =
e.g. Jun/12 2011 14:00 or
Jun 12, 2011 14:00 or Jun 12, 2011 14h00
/\b (?<month_name>#{MONTH_EN}) (?: \/|\s ) (?<day>\d{1,2}) ,? # note: allow optional comma \s (?<year>\d{4}) \s+ (?<hours>\d{1,2}) [:hH] (?<minutes>\d{2}) \b/x
- EN__MONTH_DD__DATE_TIME_RE =
e.g. Jun/12 14:00 w/ implied year H|HH:MM
or Jun 12 14h00
/\b (?<month_name>#{MONTH_EN}) (?: \/|\s ) (?<day>\d{1,2}) \s+ (?<hours>\d{1,2}) [:hH] (?<minutes>\d{2}) \b/x
- EN__MONTH_DD_YYYY__DATE_RE =
e.g. Jun/12 2013
or Jun 12 2013 or Jun 12, 2013
/\b (?<month_name>#{MONTH_EN}) (?: \/|\s ) (?<day>\d{1,2}) ,? # note: allow optional comma \s (?<year>\d{4}) \b/x
- EN__MONTH_DD__DATE_RE =
check if [/ ] works!!!! in x mode ??
/\b (?<month_name>#{MONTH_EN}) (?: \/|\s ) (?<day>\d{1,2}) \b/x
- ES__DD_MONTH__DATE_RE =
e.g. 12 Ene w/ implied year
/\b (?<day>\d{1,2}) \s (?<month_name>#{MONTH_ES}) \b/x
- ES__DAY_DD_MONTH__DATE_RE =
e.g. Vie 12 Ene w/ implied year
/\b (?<day_name>#{DAY_ES}) \.? # note: make dot optional \s (?<day>\d{1,2}) \s (?<month_name>#{MONTH_ES}) \b/x
- ES__DAY_DD_MONTH__DATE_TIME_RE =
e.g. Sáb 5 Ene 19:30
/\b (?<day_name>#{DAY_ES}) \.? # note: make dot optional \s (?<day>\d{1,2}) \s (?<month_name>#{MONTH_ES}) \s+ (?<hours>\d{1,2}) [:hH] (?<minutes>\d{2}) \b/x
- ES__DAY_DD_MM__DATE_RE =
e.g. Vie. 16.8. or Sáb. 17.8.
or Vie 16.8. or Sáb 17.8.
/\b (?<day_name>#{DAY_ES}) \.? # note: make dot optional \s (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. (?=\s+|$|[\]])/x
- IT__DAY_MM_DD__DATE_RE =
e.g. Sab. 24.8. or Dom. 25.8.
or Sab 24.8. or Dom 25.8.
/\b (?<day_name>#{DAY_IT}) \.? # note: make dot optional \s (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. (?=\s+|$|[\]])/x
- FR__DAY_DD_MONTH__DATE_RE =
e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août] note: do NOT consume [] in regex (use lookahead assert)
/\b (?<day_name>#{DAY_FR}) \s+ (?<day>\d{1,2}) \.? # note: make dot optional \s+ (?<month_name>#{MONTH_FR}) \b/x
- PT__DD_MM_YYYY_DAY__DATE_RE =
e.g. 29/03/2003 - Sábado or
29/3/2003 Sábado
/\b (?<day>\d{1,2}) \/ (?<month>\d{1,2}) \/ (?<year>\d{4}) \s+ (?: -\s+ )? # note: make dash separator (-) optional (?<day_name>#{DAY_PT}) \b/x
- PT__DAY_DD_MONTH__DATE_RE =
e.g. Sáb, 13/Maio or Qui, 08/Junho
or Sáb 13 Maio or Qui 8 Junho
/\b (?<day_name>#{DAY_PT}) \.? # note: make dot optional ,? # note: allow optional comma too \s (?<day>\d{1,2}) (?: \/|\s ) (?<month_name>#{MONTH_PT}) \b/x
- PT__DAY_DD_MM__DATE_RE =
e.g. Sáb, 29/07 or Seg, 31/07
Sáb 29/07 or Seg 31/07
/\b (?<day_name>#{DAY_PT}) \.? # note: make dot optional ,? # note: allow optional comma too \s (?<day>\d{1,2}) \/ (?<month>\d{1,2}) \b/x
- DE__DAY_MM_DD__DATE_TIME_RE =
e.g. Sa., 16.5., 18.00 Uhr or Mo., 18.5., 20.30 Uhr
Sa 16.5. 18.00 or Mo 18.5. 20.30
/\b (?<day_name>#{DAY_DE}) \.? # note: make dot optional ,? # note: allow optional comma too [ ]* (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. ,? # note: allow optional comma too [ ]* (?<hours>\d{1,2}) \. (?<minutes>\d{2}) (?:[ ]* uhr )? ## note: allow optional Uhr (?=[ \]]|$)/ix
- DE__DAY_MM_DD__DATE_RE =
e.g. Fr. 26.7. or Sa. 27.7.
or Fr 26.7. or Sa 27.7. or Fr, 26.7. or Sa, 27.7.
/\b (?<day_name>#{DAY_DE}) \.? # note: make dot optional ,? # note: allow optional comma too \s (?<day>\d{1,2}) \. (?<month>\d{1,2}) \. (?=[ \]]|$)/x
- FORMATS_BASE =
map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served
[ ### all numbers (no month names or weekday) - find a better name? [ DB__DATE_TIME_RE, '[YYYY_MM_DD_hh_mm]' ], [ DB__DATE_RE, '[YYYY_MM_DD]' ], [ DD_MM_YYYY__DATE_TIME_RE, '[DD_MM_YYYY_hh_mm]' ], [ DD_MM__DATE_TIME_RE, '[DD_MM_hh_mm]' ], [ DD_MM_YYYY__DATE_RE, '[DD_MM_YYYY]' ], [ DD_MM__DATE_RE, '[DD_MM]' ], ]
- FORMATS_EN =
[ [ EN__DAY_MONTH_DD__DATE_TIME_RE, '[EN_DAY_MONTH_DD_hh_mm]' ], [ EN__DD_MONTH_YYYY__DATE_TIME_RE, '[EN_DD_MONTH_YYYY_hh_mm]' ], [ EN__MONTH_DD_YYYY__DATE_TIME_RE, '[EN_MONTH_DD_YYYY_hh_mm]' ], [ EN__MONTH_DD__DATE_TIME_RE, '[EN_MONTH_DD_hh_mm]' ], [ EN__MONTH_DD_YYYY__DATE_RE, '[EN_MONTH_DD_YYYY]' ], [ EN__DAY_MONTH_DD__DATE_RE, '[EN_DAY_MONTH_DD]' ], [ EN__MONTH_DD__DATE_RE, '[EN_MONTH_DD]' ], [ EN__DD_MONTH__DATE_RE, '[EN_DD_MONTH]' ], ]
- FORMATS_FR =
[ [ FR__DAY_DD_MONTH__DATE_RE, '[FR_DAY_DD_MONTH]' ], ]
- FORMATS_ES =
[ [ ES__DAY_DD_MONTH__DATE_TIME_RE, '[ES_DAY_DD_MONTH_hh_mm]' ], [ ES__DAY_DD_MONTH__DATE_RE, '[ES_DAY_DD_MONTH]' ], [ ES__DD_MONTH__DATE_RE, '[ES_DD_MONTH]' ], [ ES__DAY_DD_MM__DATE_RE, '[ES_DAY_DD_MM]' ], ]
- FORMATS_PT =
[ [ PT__DD_MM_YYYY_DAY__DATE_RE, '[PT_DD_MM_YYYY_DAY]' ], [ PT__DAY_DD_MONTH__DATE_RE, '[PT_DAY_DD_MONTH]' ], [ PT__DAY_DD_MM__DATE_RE, '[PT_DAY_DD_MM]' ], ]
- FORMATS_DE =
[ [ DE__DAY_MM_DD__DATE_TIME_RE, '[DE_DAY_MM_DD_hh_mm]' ], [ DE__DAY_MM_DD__DATE_RE, '[DE_DAY_MM_DD]' ], ]
- FORMATS_IT =
[ [ IT__DAY_MM_DD__DATE_RE, '[IT_DAY_MM_DD]' ], ]
- FORMATS =
{ en: FORMATS_EN + FORMATS_BASE, fr: FORMATS_FR + FORMATS_BASE, es: FORMATS_ES + FORMATS_BASE, pt: FORMATS_PT + FORMATS_BASE, de: FORMATS_DE + FORMATS_BASE, it: FORMATS_IT + FORMATS_BASE, }
- MAJOR =
todo: namespace inside version or something - why? why not??
1
- MINOR =
0
- PATCH =
2
- VERSION =
[MAJOR,MINOR,PATCH].join('.')
Class Method Summary collapse
- .banner ⇒ Object
- .build_names(lines) ⇒ Object
- .find!(line, lang: DateFormats.lang, start: Date.new( Date.today.year, 1, 1 )) ⇒ Object
- .lang ⇒ Object
- .lang=(value) ⇒ Object
- .parse(line, lang: DateFormats.lang, start: Date.new( Date.today.year, 1, 1 )) ⇒ Object
- .parse_day(txt) ⇒ Object
- .parse_month(txt) ⇒ Object
-
.parser(lang:) ⇒ Object
find parser.
- .root ⇒ Object
- .version ⇒ Object
Class Method Details
.banner ⇒ Object
14 15 16 |
# File 'lib/date-formats/version.rb', line 14 def self. "date-formats/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]" end |
.build_names(lines) ⇒ Object
60 61 62 63 64 |
# File 'lib/date-formats/reader.rb', line 60 def self.build_names( lines ) ## join all words together into a single string e.g. ## January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|... lines.map { |line| line.join('|') }.join('|') end |
.find!(line, lang: DateFormats.lang, start: Date.new( Date.today.year, 1, 1 )) ⇒ Object
31 32 33 34 35 36 |
# File 'lib/date-formats/parser.rb', line 31 def self.find!( line, lang: DateFormats.lang, ## todo/check: is there a "generic" like self.class.lang form? start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided ) parser( lang: lang ).find!( line, start: start ) end |
.lang ⇒ Object
7 8 9 |
# File 'lib/date-formats/parser.rb', line 7 def self.lang @@lang ||= :en ## defaults to english (:en) end |
.lang=(value) ⇒ Object
10 11 12 13 |
# File 'lib/date-formats/parser.rb', line 10 def self.lang=( value ) @@lang = value.to_sym ## note: make sure lang is always a symbol for now (NOT a string) @@lang ## todo/check: remove =() method always returns passed in value? double check end |
.parse(line, lang: DateFormats.lang, start: Date.new( Date.today.year, 1, 1 )) ⇒ Object
24 25 26 27 28 29 |
# File 'lib/date-formats/parser.rb', line 24 def self.parse( line, lang: DateFormats.lang, ## todo/check: is there a "generic" like self.class.lang form? yes, module DateFormats needs to get changed to class DateFormats to work!! start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided ) parser( lang: lang ).parse( line, start: start ) end |
.parse_day(txt) ⇒ Object
50 51 52 53 54 55 56 57 |
# File 'lib/date-formats/reader.rb', line 50 def self.parse_day( txt ) lines = Reader.parse( txt ) if lines.size != 7 puts "*** !!! ERROR !!! reading day names; got #{lines.size} lines - expected 7" exit 1 end lines end |
.parse_month(txt) ⇒ Object
41 42 43 44 45 46 47 48 |
# File 'lib/date-formats/reader.rb', line 41 def self.parse_month( txt ) lines = Reader.parse( txt ) if lines.size != 12 puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12" exit 1 end lines end |
.parser(lang:) ⇒ Object
find parser
16 17 18 19 20 21 22 |
# File 'lib/date-formats/parser.rb', line 16 def self.parser( lang: ) ## find parser lang = lang.to_sym ## note: make sure lang is always a symbol for now (NOT a string) ## note: cache all "built-in" lang versions (e.g. formats == nil) @@parser ||= {} @@parser[ lang ] ||= DateParser.new( lang: lang ) end |
.root ⇒ Object
18 19 20 |
# File 'lib/date-formats/version.rb', line 18 def self.root File.( File.dirname(File.dirname(File.dirname(__FILE__))) ) end |
.version ⇒ Object
10 11 12 |
# File 'lib/date-formats/version.rb', line 10 def self.version VERSION end |