Module: DateFormats

Defined in:
lib/date-formats.rb,
lib/date-formats.rb,
lib/date-formats/names.rb,
lib/date-formats/parser.rb,
lib/date-formats/reader.rb,
lib/date-formats/formats.rb,
lib/date-formats/version.rb

Overview

todo/fix: make logging class configurable - lets you use logutils etc.

Defined Under Namespace

Modules: Logging Classes: DateParser, Reader, RsssfDateParser

Constant Summary collapse

MONTH_EN =

helpers for building format regex patterns

build_names( MONTH_NAMES[:en] )
DAY_EN =
build_names( DAY_NAMES[:en] )
MONTH_FR =
build_names( MONTH_NAMES[:fr] )
DAY_FR =
build_names( DAY_NAMES[:fr] )
MONTH_ES =
build_names( MONTH_NAMES[:es] )
DAY_ES =
build_names( DAY_NAMES[:es] )
MONTH_PT =
build_names( MONTH_NAMES[:pt] )
DAY_PT =
build_names( DAY_NAMES[:pt] )
MONTH_DE =
build_names( MONTH_NAMES[:de] )
DAY_DE =
build_names( DAY_NAMES[:de] )
MONTH_IT =
build_names( MONTH_NAMES[:it] )
DAY_IT =
build_names( DAY_NAMES[:it] )
MONTH_NAMES =

note: always sort lines with longest words, abbrevations first!!!!

todo/fix: add/split into MONTH_NAMES and MONTH_ABBREVS (and DAY_NAMES and DAY_ABBREVS) - why? why not?
{}
DAY_NAMES =
{}
DB__DATE_TIME_RE =

todo/fix: add rule with allowed / separator (e.g. 2019/12/11)

BUT must be used in all following case too (NO mix'n'match allowed e.g. 2019-11/12)
/\b
               (?<year>\d{4})
 -
               (?<month>\d{1,2})
 -
               (?<day>\d{1,2})
\s+
               (?<hours>\d{1,2})
 [:.hH]
               (?<minutes>\d{2})
\b/x
DB__DATE_RE =

e.g. 2012-09-14 => YYYY-MM-DD

note: allow 2012-9-3 e.g. no leading zero required

regex_db2

/\b
                  (?<year>\d{4})
  -
                  (?<month>\d{1,2})
  -
                  (?<day>\d{1,2})
\b/x
DD_MM_YYYY__DATE_TIME_RE =

e.g. 14.09.2012 20:30 => DD.MM.YYYY HH:MM

note: allow 2.3.2012 e.g. no leading zero required
note: allow hour as 20.30

regex_de

/\b
                        (?<day>\d{1,2})
\.
                        (?<month>\d{1,2})
\.
                        (?<year>\d{4})
\s+
                        (?<hours>\d{1,2})
[:.hH]
                        (?<minutes>\d{2})
\b/x
DD_MM__DATE_TIME_RE =

e.g. 14.09. 20:30 => DD.MM. HH:MM

note: allow 2.3.2012 e.g. no leading zero required
note: allow hour as 20.30  or 3.30 instead of 03.30

regex_de2

/\b
                      (?<day>\d{1,2})
 \.
                      (?<month>\d{1,2})
 \.
 \s+
                      (?<hours>\d{1,2})
 [:.hH]
                      (?<minutes>\d{2})
\b/x
DD_MM_YYYY__DATE_RE =

e.g. 14.09.2012 => DD.MM.YYYY regex_de3

/\b
                  (?<day>\d{1,2})
\.
                  (?<month>\d{1,2})
\.
                  (?<year>\d{4})
\b/x
DD_MM__DATE_RE =

e.g. 14.09. => DD.MM. w/ implied year

note: allow end delimiter ] e.g. [Sa 12.01.] or end-of-string ($) too
note: we use a lookahead for last part e.g. (?:\s+|$|[\]]) - do NOT cosume

regex_de4 (use lookahead assert)

/\b
(?<day>\d{1,2})
   \.
(?<month>\d{1,2})
   \.
(?=\s+|$|[\]])/x
EN__DD_MONTH_YYYY__DATE_TIME_RE =

e.g. 12 May 2013 14:00 => D|DD.MMM.YYYY H|HH:MM

or 12 May 2013 14h00
/\b
              (?<day>\d{1,2})
 \s
              (?<month_name>#{MONTH_EN})
 \s
              (?<year>\d{4})
 \s+
              (?<hours>\d{1,2})
[:hH]
              (?<minutes>\d{2})
\b/x
EN__DD_MONTH__DATE_RE =

e.g. 12 May => D|DD.MMM w/ implied year

/\b
              (?<day>\d{1,2})
\s
              (?<month_name>#{MONTH_EN})
\b/x
EN__DAY_MONTH_DD__DATE_RE =

e.g. Fri Aug/9 or Fri Aug 9

Fri, Aug/9 or Fri, Aug 9
/\b
     (?<day_name>#{DAY_EN})
,?   # note: allow optional comma
\s
     (?<month_name>#{MONTH_EN})
(?: \/|\s )
     (?<day>\d{1,2})
\b/x
EN__DAY_MONTH_DD__DATE_TIME_RE =

e.g. Fri Aug/9 18:00 or Fri Aug 9 18:00

Fri, Aug/9 18:00 or Fri, Aug 9 18:00
/\b
     (?<day_name>#{DAY_EN})
        ,?   # note: allow optional comma
        \s
     (?<month_name>#{MONTH_EN})
        (?: \/|\s )
     (?<day>\d{1,2})
\s+
     (?<hours>\d{1,2})
        [:hH]
     (?<minutes>\d{2})
\b/x
EN__MONTH_DD_YYYY__DATE_TIME_RE =

e.g. Jun/12 2011 14:00 or

Jun 12, 2011 14:00 or
Jun 12, 2011 14h00
/\b
                 (?<month_name>#{MONTH_EN})
(?: \/|\s )
                 (?<day>\d{1,2})
,?   # note: allow optional comma
\s
                 (?<year>\d{4})
\s+
                 (?<hours>\d{1,2})
[:hH]
                 (?<minutes>\d{2})
\b/x
EN__MONTH_DD__DATE_TIME_RE =

e.g. Jun/12 14:00 w/ implied year H|HH:MM

or  Jun 12 14h00
/\b
                 (?<month_name>#{MONTH_EN})
(?: \/|\s )
                 (?<day>\d{1,2})
\s+
                 (?<hours>\d{1,2})
[:hH]
                 (?<minutes>\d{2})
\b/x
EN__MONTH_DD_YYYY__DATE_RE =

e.g. Jun/12 2013

or Jun 12 2013
or Jun 12, 2013
/\b
              (?<month_name>#{MONTH_EN})
 (?: \/|\s )
              (?<day>\d{1,2})
 ,?   # note: allow optional comma
 \s
              (?<year>\d{4})
\b/x
EN__MONTH_DD__DATE_RE =

check if [/ ] works!!!! in x mode ??

/\b
                 (?<month_name>#{MONTH_EN})
 (?: \/|\s )
                 (?<day>\d{1,2})
\b/x
ES__DD_MONTH__DATE_RE =

e.g. 12 Ene w/ implied year

/\b
                 (?<day>\d{1,2})
\s
                 (?<month_name>#{MONTH_ES})
\b/x
ES__DAY_DD_MONTH__DATE_RE =

e.g. Vie 12 Ene w/ implied year

/\b
                 (?<day_name>#{DAY_ES})
\.?        # note: make dot optional
\s
                 (?<day>\d{1,2})
\s
                 (?<month_name>#{MONTH_ES})
\b/x
ES__DAY_DD_MONTH__DATE_TIME_RE =

e.g. Sáb 5 Ene 19:30

/\b
                 (?<day_name>#{DAY_ES})
\.?        # note: make dot optional
\s
                 (?<day>\d{1,2})
\s
                 (?<month_name>#{MONTH_ES})
\s+
                 (?<hours>\d{1,2})
[:hH]
                 (?<minutes>\d{2})
\b/x
ES__DAY_DD_MM__DATE_RE =

e.g. Vie. 16.8. or Sáb. 17.8.

or  Vie 16.8.  or  Sáb 17.8.
/\b
        (?<day_name>#{DAY_ES})
 \.?        # note: make dot optional
 \s
        (?<day>\d{1,2})
 \.
        (?<month>\d{1,2})
 \.
(?=\s+|$|[\]])/x
IT__DAY_MM_DD__DATE_RE =

e.g. Sab. 24.8. or Dom. 25.8.

or  Sab 24.8.  or Dom 25.8.
/\b
        (?<day_name>#{DAY_IT})
 \.?        # note: make dot optional
 \s
        (?<day>\d{1,2})
 \.
        (?<month>\d{1,2})
 \.
(?=\s+|$|[\]])/x
FR__DAY_DD_MONTH__DATE_RE =

e.g. Ven 8 Août or [Ven 8 Août] or Ven 8. Août or [Ven 8. Août] note: do NOT consume [] in regex (use lookahead assert)

/\b
     (?<day_name>#{DAY_FR})
\s+
     (?<day>\d{1,2})
\.?        # note: make dot optional
\s+
     (?<month_name>#{MONTH_FR})
\b/x
PT__DD_MM_YYYY_DAY__DATE_RE =

e.g. 29/03/2003 - Sábado or

29/3/2003 Sábado
/\b
 (?<day>\d{1,2})
    \/
 (?<month>\d{1,2})
    \/
 (?<year>\d{4})
    \s+
    (?: -\s+ )?   # note: make dash separator (-) optional
 (?<day_name>#{DAY_PT})
\b/x
PT__DAY_DD_MONTH__DATE_RE =

e.g. Sáb, 13/Maio or Qui, 08/Junho

or  Sáb 13 Maio or Qui 8 Junho
/\b
 (?<day_name>#{DAY_PT})
    \.?        # note: make dot optional
    ,?         # note: allow optional comma too
    \s
 (?<day>\d{1,2})
    (?: \/|\s )
 (?<month_name>#{MONTH_PT})
\b/x
PT__DAY_DD_MM__DATE_RE =

e.g. Sáb, 29/07 or Seg, 31/07

Sáb 29/07  or  Seg 31/07
/\b
 (?<day_name>#{DAY_PT})
    \.?        # note: make dot optional
    ,?         # note: allow optional comma too
    \s
 (?<day>\d{1,2})
    \/
 (?<month>\d{1,2})
\b/x
DE__DAY_MM_DD__DATE_TIME_RE =

e.g. Sa., 16.5., 18.00 Uhr or Mo., 18.5., 20.30 Uhr

Sa 16.5. 18.00         or  Mo 18.5. 20.30
/\b
        (?<day_name>#{DAY_DE})
 \.?        # note: make dot optional
 ,?         # note: allow optional comma too
 [ ]*
        (?<day>\d{1,2})
 \.
        (?<month>\d{1,2})
 \.
 ,?         # note: allow optional comma too
 [ ]*
         (?<hours>\d{1,2})
 \.
         (?<minutes>\d{2})
  (?:[ ]*
     uhr
  )?   ## note: allow optional Uhr
(?=[ \]]|$)/ix
DE__DAY_MM_DD__DATE_RE =

e.g. Fr. 26.7. or Sa. 27.7.

or  Fr 26.7.  or  Sa 27.7.
or  Fr, 26.7. or  Sa, 27.7.
/\b
        (?<day_name>#{DAY_DE})
 \.?        # note: make dot optional
 ,?         # note: allow optional comma too
 \s
        (?<day>\d{1,2})
 \.
        (?<month>\d{1,2})
 \.
(?=[ \]]|$)/x
FORMATS_BASE =

map tables - 1) regex, 2) tag - note: order matters; first come-first matched/served

[    ### all numbers (no month names or weekday) - find a better name?
  [ DB__DATE_TIME_RE,         '[YYYY_MM_DD_hh_mm]' ],
  [ DB__DATE_RE,              '[YYYY_MM_DD]'       ],
  [ DD_MM_YYYY__DATE_TIME_RE, '[DD_MM_YYYY_hh_mm]' ],
  [ DD_MM__DATE_TIME_RE,      '[DD_MM_hh_mm]'      ],
  [ DD_MM_YYYY__DATE_RE,      '[DD_MM_YYYY]'       ],
  [ DD_MM__DATE_RE,           '[DD_MM]'            ],
]
FORMATS_EN =
[
  [ EN__DAY_MONTH_DD__DATE_TIME_RE,  '[EN_DAY_MONTH_DD_hh_mm]'  ],
  [ EN__DD_MONTH_YYYY__DATE_TIME_RE, '[EN_DD_MONTH_YYYY_hh_mm]' ],
  [ EN__MONTH_DD_YYYY__DATE_TIME_RE, '[EN_MONTH_DD_YYYY_hh_mm]' ],
  [ EN__MONTH_DD__DATE_TIME_RE,      '[EN_MONTH_DD_hh_mm]'      ],
  [ EN__MONTH_DD_YYYY__DATE_RE,      '[EN_MONTH_DD_YYYY]'       ],
  [ EN__DAY_MONTH_DD__DATE_RE,       '[EN_DAY_MONTH_DD]'        ],
  [ EN__MONTH_DD__DATE_RE,           '[EN_MONTH_DD]'            ],
  [ EN__DD_MONTH__DATE_RE,           '[EN_DD_MONTH]'            ],
]
FORMATS_FR =
[
  [ FR__DAY_DD_MONTH__DATE_RE,       '[FR_DAY_DD_MONTH]' ],
]
FORMATS_ES =
[
  [ ES__DAY_DD_MONTH__DATE_TIME_RE,  '[ES_DAY_DD_MONTH_hh_mm]' ],
  [ ES__DAY_DD_MONTH__DATE_RE,       '[ES_DAY_DD_MONTH]' ],
  [ ES__DD_MONTH__DATE_RE,           '[ES_DD_MONTH]' ],
  [ ES__DAY_DD_MM__DATE_RE,          '[ES_DAY_DD_MM]' ],
]
FORMATS_PT =
[
  [ PT__DD_MM_YYYY_DAY__DATE_RE,     '[PT_DD_MM_YYYY_DAY]' ],
  [ PT__DAY_DD_MONTH__DATE_RE,       '[PT_DAY_DD_MONTH]' ],
  [ PT__DAY_DD_MM__DATE_RE,          '[PT_DAY_DD_MM]' ],
]
FORMATS_DE =
[
   [ DE__DAY_MM_DD__DATE_TIME_RE,    '[DE_DAY_MM_DD_hh_mm]' ],
   [ DE__DAY_MM_DD__DATE_RE,         '[DE_DAY_MM_DD]' ],
]
FORMATS_IT =
[
   [ IT__DAY_MM_DD__DATE_RE,          '[IT_DAY_MM_DD]' ],
]
FORMATS =
{
  en: FORMATS_EN + FORMATS_BASE,
  fr: FORMATS_FR + FORMATS_BASE,
  es: FORMATS_ES + FORMATS_BASE,
  pt: FORMATS_PT + FORMATS_BASE,
  de: FORMATS_DE + FORMATS_BASE,
  it: FORMATS_IT + FORMATS_BASE,
}
MAJOR =

todo: namespace inside version or something - why? why not??

1
MINOR =
0
PATCH =
2
VERSION =
[MAJOR,MINOR,PATCH].join('.')

Class Method Summary collapse

Class Method Details



14
15
16
# File 'lib/date-formats/version.rb', line 14

def self.banner
  "date-formats/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
end

.build_names(lines) ⇒ Object



60
61
62
63
64
# File 'lib/date-formats/reader.rb', line 60

def self.build_names( lines )
  ## join all words together into a single string e.g.
  ##   January|Jan|February|Feb|March|Mar|April|Apr|May|June|Jun|...
  lines.map { |line| line.join('|') }.join('|')
end

.find!(line, lang: DateFormats.lang, start: Date.new( Date.today.year, 1, 1 )) ⇒ Object



31
32
33
34
35
36
# File 'lib/date-formats/parser.rb', line 31

def self.find!( line,
                lang:  DateFormats.lang,    ## todo/check: is there a "generic" like self.class.lang form?
                start: Date.new( Date.today.year, 1, 1 ) ## note: default to current YYYY.01.01. if no start provided
              )
  parser( lang: lang ).find!( line, start: start )
end

.langObject



7
8
9
# File 'lib/date-formats/parser.rb', line 7

def self.lang
  @@lang ||= :en            ## defaults to english (:en)
end

.lang=(value) ⇒ Object



10
11
12
13
# File 'lib/date-formats/parser.rb', line 10

def self.lang=( value )
  @@lang = value.to_sym    ## note: make sure lang is always a symbol for now (NOT a string)
  @@lang      ## todo/check: remove  =() method always returns passed in value? double check
end

.parse(line, lang: DateFormats.lang, start: Date.new( Date.today.year, 1, 1 )) ⇒ Object



24
25
26
27
28
29
# File 'lib/date-formats/parser.rb', line 24

def self.parse( line,
                lang:    DateFormats.lang,    ## todo/check: is there a "generic" like self.class.lang form? yes, module DateFormats needs to get changed to class DateFormats to work!!
                start:   Date.new( Date.today.year, 1, 1 )  ## note: default to current YYYY.01.01. if no start provided
              )
  parser( lang: lang ).parse( line, start: start )
end

.parse_day(txt) ⇒ Object



50
51
52
53
54
55
56
57
# File 'lib/date-formats/reader.rb', line 50

def self.parse_day( txt )
    lines = Reader.parse( txt )
    if lines.size != 7
      puts "*** !!! ERROR !!! reading day names; got #{lines.size} lines - expected 7"
      exit 1
    end
    lines
end

.parse_month(txt) ⇒ Object



41
42
43
44
45
46
47
48
# File 'lib/date-formats/reader.rb', line 41

def self.parse_month( txt )
    lines = Reader.parse( txt )
    if lines.size != 12
      puts "*** !!! ERROR !!! reading month names; got #{lines.size} lines - expected 12"
      exit 1
    end
    lines
end

.parser(lang:) ⇒ Object

find parser



16
17
18
19
20
21
22
# File 'lib/date-formats/parser.rb', line 16

def self.parser( lang: )  ## find parser
  lang = lang.to_sym  ## note: make sure lang is always a symbol for now (NOT a string)

  ## note: cache all "built-in" lang versions (e.g. formats == nil)
  @@parser ||= {}
  @@parser[ lang ] ||= DateParser.new( lang: lang )
end

.rootObject



18
19
20
# File 'lib/date-formats/version.rb', line 18

def self.root
  File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
end

.versionObject



10
11
12
# File 'lib/date-formats/version.rb', line 10

def self.version
  VERSION
end