Class: AnyStyle::Normalizer::Date

Inherits:
AnyStyle::Normalizer show all
Defined in:
lib/anystyle/normalizer/date.rb

Instance Attribute Summary

Attributes inherited from AnyStyle::Normalizer

#keys, #skip

Instance Method Summary collapse

Methods inherited from AnyStyle::Normalizer

#append, #detect_language, #detect_scripts, #doi_extract, #each_value, #initialize, #keys_for, #map_values, #name, #skip?

Constructor Details

This class inherits a constructor from AnyStyle::Normalizer

Instance Method Details

#approximate?(date) ⇒ Boolean

Returns:

  • (Boolean)

49
50
51
# File 'lib/anystyle/normalizer/date.rb', line 49

def approximate?(date)
  date =~ /(\b(circa|ca\.|vers|approx))|(^[cv]\.)/i
end

#extract_day(date) ⇒ Object


69
70
71
72
73
74
75
# File 'lib/anystyle/normalizer/date.rb', line 69

def extract_day(date)
  if date =~ /\b([012]?\d|3[01])\b/
    '%02d' % $1.to_i
  else
    nil
  end
end

#extract_month_by_name(date) ⇒ Object


77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/anystyle/normalizer/date.rb', line 77

def extract_month_by_name(date)
  case date
  when /\bjan/i
    '01'
  when /\bf(eb|év)/i
    '02'
  when /\bmar/i
    '03'
  when /\ba[pv]r/i
    '04'
  when /\bma[yi]/i
    '05'
  when /\bjui?n/i
    '06'
  when /\bjui?l/i
    '07'
  when /\ba(ug|oût)/i
    '08'
  when /\bsep/i
    '09'
  when /\bo[ck]t/i
    '10'
  when /\bnov/i
    '11'
  when /\bd[eé]c/i
    '12'
  else
    nil
  end
end

#extract_uncertainty(date) ⇒ Object


53
54
55
56
57
58
59
# File 'lib/anystyle/normalizer/date.rb', line 53

def extract_uncertainty(date)
  if approximate?(date)
    uncertain?(date) ? '%' : '~'
  else
    uncertain?(date) ? '?' : nil
  end
end

#extract_year(date) ⇒ Object


61
62
63
64
65
66
67
# File 'lib/anystyle/normalizer/date.rb', line 61

def extract_year(date)
  if date =~ /\D?([012]\d\d\d)\D?/
    $1
  else
    nil
  end
end

#interval?(date) ⇒ Boolean

Returns:

  • (Boolean)

37
38
39
# File 'lib/anystyle/normalizer/date.rb', line 37

def interval?(date)
  date =~ /\/|\s\p{Pd}\s|(\s([12]?\d|30)\p{Pd}([12]?\d|3[01])?)/
end

#iso?(date) ⇒ Boolean

Returns:

  • (Boolean)

33
34
35
# File 'lib/anystyle/normalizer/date.rb', line 33

def iso?(date)
  date =~ /[012]\d\d\d-\d\d-\d\d/
end

#normalize(item, **opts) ⇒ Object


6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/anystyle/normalizer/date.rb', line 6

def normalize(item, **opts)
  map_values(item) do |_, value|
    case
    when unknown?(value)
      'XXXX'
    when interval?(value)
      value
    # TODO AD/BC
    # TODO Seasons
    when iso?(value)
      value
    else
      year = extract_year(value)
      unless year.nil?
        month = extract_month_by_name(value)
        day = extract_day(value) unless month.nil?
        [
          [year, month, day].compact.join('-'),
          extract_uncertainty(value)
        ].compact.join('')
      else
        value
      end
    end
  end
end

#uncertain?(date) ⇒ Boolean

Returns:

  • (Boolean)

45
46
47
# File 'lib/anystyle/normalizer/date.rb', line 45

def uncertain?(date)
  date =~ /\?/
end

#unknown?(date) ⇒ Boolean

Returns:

  • (Boolean)

41
42
43
# File 'lib/anystyle/normalizer/date.rb', line 41

def unknown?(date)
  date =~ /inconnue|unknown|unbekannt|[ns]\. ?d\b|no date/i
end