Class: Temporal::Parser

Inherits:
Object show all
Defined in:
lib/temporals/parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(expression) ⇒ Parser

Returns a new instance of Parser.



3
4
5
6
# File 'lib/temporals/parser.rb', line 3

def initialize(expression)
  # Make a copy of the passed in string, rather than mutate it
  @expression = expression.to_s.dup
end

Instance Method Details

#language_patterns_combinedObject



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/temporals/parser.rb', line 112

def language_patterns_combined
  @language_patterns_combined || begin
    language_patterns_combined = tokenized.dup

    # 3. Combine common language patterns
    puts language_patterns_combined.inspect if $DEBUG
    puts language_patterns_combined.collect {|e| e[:type] }.inspect if $DEBUG

    something_was_modified = true
    while something_was_modified
      something_was_modified = false
      before_length = language_patterns_combined.length
      CommonPatterns.each do |pattern|
        while i = language_patterns_combined.collect {|e| e[:type] }.includes_sequence?(pattern.split(/ /))
          CommonPatternActions[pattern].call(language_patterns_combined,i)
        end
      end
      after_length = language_patterns_combined.length
      something_was_modified = true if before_length != after_length
    end
  
    puts language_patterns_combined.inspect if $DEBUG
    puts language_patterns_combined.collect {|e| e[:type] }.inspect if $DEBUG
  
    @language_patterns_combined = language_patterns_combined
  end
end

#normalizedObject



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/temporals/parser.rb', line 8

def normalized
  @normalized || begin
    normalized = @expression.dup
    # 1. Normalize the expression
    # TODO: re-create normalize: ' -&| ', 'time-time'
    normalized.gsub!(/[\s+,]/,' ')
    # Pad special characters with spaces for now
    normalized.gsub!(/([\-\&\|])/,' \1 ')
    # Get rid of spaces between time ranges
    normalized.gsub!(/(#{TimeRegexp}?) +(?:-+|to) +(#{TimeRegexp})/,'\1-\2')
    # Normalize to 4-digit years
    normalized.gsub!(/in ([09]\d|\d{4})/) {|s|
      y = $1
      y.length == 2 ? (y =~ /^0/ ? '20'+y : '19'+y) : y
    }
    # Normalize expressions of time
    normalized.gsub!(/(^| )(#{TimeRegexp})( |$)/i) {|s|
      b = $1
      time = $2
      a = $3
      if s =~ /[:m]/ # If it really looks like a lone piece of time, it'll have either a am/pm or a ':' in it.
        # Converting a floating time into a timerange that spans the appropriate duration
        puts "Converting Time to TimeRange: #{time.inspect}" if $DEBUG
        # Figure out what precision we're at
        newtime = time + '-'
        if time =~ /(\d+):(\d+)([ap]m?|$)?/
          end_hr = $1.to_i
          end_mn = $2.to_i + 1
          if end_mn > 59
            end_mn -= 60
            end_hr += 1
          end
          end_hr -= 12 if end_hr > 12
          newtime += "#{end_hr}:#{end_mn}#{$3}" # end-time is 1 minute later
        elsif time =~ /(\d+)([ap]m?|$)?/
          end_hr = $1.to_i + 1
          end_hr -= 12 if end_hr > 12
          newtime += "#{end_hr}#{$2}" # end-time is 1 hour later
        end
        puts "Converted! #{newtime}" if $DEBUG
        b+newtime+a
      else
        s
      end
    }
    puts "Normalized expression: #{normalized.inspect}" if $DEBUG
    @normalized = normalized
  end
end

#tokenizedObject



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/temporals/parser.rb', line 58

def tokenized
  @tokenized || begin
    # 2. Tokenize distinct pieces (words) in the expression
    words = normalized.split(/\s+/)
    puts words.inspect if $DEBUG
    tokenized = words.inject([]) do |a,word|
      a << case word
      when WordTypes[:ord]
        {:type => 'ord', :ord => $1}
      when WordTypes[:word_ord]
        ord = WordOrds.include?(word.downcase) ? WordOrds.index(word.downcase)+1 : 'last'
        puts "WordOrd: #{ord}" if $DEBUG
        {:type => 'ord', :ord => ord}
      when WordTypes[:wday]
        {:type => 'wday', :wday => WDay.new($1)}
      when WordTypes[:year]
        {:type => 'year', :year => word}
      when WordTypes[:month]
        {:type => 'month', :month => Month.new(word)}
      when WordTypes[:union]
        {:type => 'union'}
      when WordTypes[:range]
        {:type => 'range'}
      when WordTypes[:timerange]
        # determine and inject am/pm
        start_at = $1
        end_at = $2
        start_at_p = $1 if start_at =~ /([ap])m?$/
        end_at_p = $1 if end_at =~ /([ap])m?$/
        start_hr = start_at.split(/:/)[0].to_i
          start_hr = '0' if start_hr == '12' # this is used only for > & < comparisons, so converting it to 0 makes everything easier.
        end_hr = end_at.split(/:/)[0].to_i
        if start_at_p && !end_at_p
          # If end-time is a lower hour number than start-time, then we've crossed noon or midnight, and the end-time a/pm should be opposite.
          end_at = end_at + (start_hr <= end_hr ? start_at_p : (start_at_p=='a' ? 'p' : 'a'))
        elsif end_at_p && !start_at_p
          # If end-time is a lower hour number than start-time, then we've crossed noon or midnight, and the start-time a/pm should be opposite.
          start_at = start_at + (start_hr <= end_hr ? end_at_p : (end_at_p=='a' ? 'p' : 'a'))
        elsif !end_at_p && !start_at_p
          # If neither had am/pm attached, assume am if after 7, pm if 12 or before 7.
          start_at_p = (start_hr < 8 ? 'p' : 'a')
          start_at = start_at + start_at_p
          # If end-time is a lower hour number than start-time, then we've crossed noon or midnight, and the end-time a/pm should be opposite.
          end_at = end_at + (start_hr <= end_hr ? start_at_p : (start_at_p=='a' ? 'p' : 'a'))
        end
        start_at += 'm' unless start_at =~ /m$/
        end_at += 'm' unless end_at =~ /m$/
        {:type => 'timerange', :start_time => start_at, :end_time => end_at}
      end
    end.compact
    @tokenized = tokenized
  end
end

#yieldedObject



140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/temporals/parser.rb', line 140

def yielded
  # Binds it all together into a Set or a Union object
  @yielded || begin

    yielded = language_patterns_combined.dup

    # What remains should be simply sections of Set logic
    # 4. Parse Set logic
    yielded.each_index do |i|
      yielded[i] = Temporal.new(yielded[i]) unless yielded[i][:type].in?('union', 'range')
    end

    BooleanPatterns.each do |pattern|
      while i = yielded.collect {|e| e[:type] }.includes_sequence?(pattern.split(/ /))
        BooleanPatternActions[pattern].call(yielded,i)
        break if yielded.length == 1
      end
    end

    # This is how we know if the expression couldn't quite be figured out. It should have been condensed down to a single Temporal or Temporal::Set
    if yielded.length > 1
      raise RuntimeError, "Could not parse Temporal Expression: check to make sure it is clear and has only one possible meaning to an English-speaking person."
    end

    @yielded = yielded[0]
  end
end