Class: Embulk::Guess::TimeFormatGuess::GuessPattern

Inherits:
Object
  • Object
show all
Includes:
Parts
Defined in:
lib/embulk/guess/time_format_guess.rb

Constant Summary collapse

YMD =

yyyy-MM-dd

/(?<year>#{YEAR})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<day>#{DAY})/
YMD_NODELIM =
/(?<year>#{YEAR})(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})/
MDY =

MM/dd/yyyy

/(?<month>#{MONTH})(?<date_delim>#{date_delims})(?<day>#{DAY})\k<date_delim>(?<year>#{YEAR})/
MDY_NODELIM =
/(?<month>#{MONTH_NODELIM})(?<day>#{DAY_NODELIM})(?<year>#{YEAR})/
DMY =

dd.MM.yyyy

/(?<day>#{DAY})(?<date_delim>#{date_delims})(?<month>#{MONTH})\k<date_delim>(?<year>#{YEAR})/
DMY_NODELIM =
/(?<day>#{DAY_NODELIM})(?<month>#{MONTH_NODELIM})(?<year>#{YEAR})/
TIME =
/(?<hour>#{HOUR})(?:(?<time_delim>#{time_delims})(?<minute>#{MINUTE})(?:\k<time_delim>(?<second>#{SECOND})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?)?/
TIME_NODELIM =
/(?<hour>#{HOUR_NODELIM})(?:(?<minute>#{MINUTE_NODELIM})((?<second>#{SECOND_NODELIM})(?:(?<frac_delim>#{frac_delims})(?<frac>#{frac}))?)?)?/
ZONE =
/(?<zone_space> )?(?<zone>(?<zone_off>#{ZONE_OFF})|(?<zone_abb>#{ZONE_ABB}))/

Constants included from Parts

Parts::DAY, Parts::DAY_NODELIM, Parts::HOUR, Parts::HOUR_NODELIM, Parts::MINUTE, Parts::MINUTE_NODELIM, Parts::MONTH, Parts::MONTH_NAME_FULL, Parts::MONTH_NAME_SHORT, Parts::MONTH_NODELIM, Parts::WEEKDAY_NAME_FULL, Parts::WEEKDAY_NAME_SHORT, Parts::YEAR, Parts::ZONE_ABB, Parts::ZONE_OFF

Instance Method Summary collapse

Instance Method Details

#match(text) ⇒ Object



189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/embulk/guess/time_format_guess.rb', line 189

def match(text)
  delimiters = []
  parts = []
  part_options = []

  if dm = (/^#{YMD}(?<rest>.*?)$/.match(text) or /^#{YMD_NODELIM}(?<rest>.*?)$/.match(text))
    date_delim = dm["date_delim"] rescue ""

    parts << :year
    part_options << nil

    delimiters << date_delim
    parts << :month
    part_options << part_heading_option(dm["month"])

    delimiters << date_delim
    parts << :day
    part_options << part_heading_option(dm["day"])

  elsif dm = (/^#{MDY}(?<rest>.*?)$/.match(text) or /^#{MDY_NODELIM}(?<rest>.*?)$/.match(text))
    date_delim = dm["date_delim"] rescue ""

    parts << :month
    part_options << part_heading_option(dm["month"])

    delimiters << date_delim
    parts << :day
    part_options << part_heading_option(dm["day"])

    delimiters << date_delim
    parts << :year
    part_options << nil

  elsif dm = (/^#{DMY}(?<rest>.*?)$/.match(text) or /^#{DMY_NODELIM}(?<rest>.*?)$/.match(text))
    date_delim = dm["date_delim"] rescue ""

    parts << :day
    part_options << part_heading_option(dm["day"])

    delimiters << date_delim
    parts << :month
    part_options << part_heading_option(dm["month"])

    delimiters << date_delim
    parts << :year
    part_options << nil

  else
    date_delim = ""
    return nil
  end
  rest = dm["rest"]

  date_time_delims = /(:? |_|T|\. ?)/
  if tm = (
        /^(?<date_time_delim>#{date_time_delims})#{TIME}(?<rest>.*?)?$/.match(rest) or
        /^(?<date_time_delim>#{date_time_delims})#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest) or
        (date_delim == "" && /^#{TIME_NODELIM}(?<rest>.*?)?$/.match(rest))
      )
    date_time_delim = tm["date_time_delim"] rescue ""
    time_delim = tm["time_delim"] rescue ""

    delimiters << date_time_delim
    parts << :hour
    part_options << part_heading_option(tm["hour"])

    if tm["minute"]
      delimiters << time_delim
      parts << :minute
      part_options << part_heading_option(tm["minute"])

      if tm["second"]
        delimiters << time_delim
        parts << :second
        part_options << part_heading_option(tm["second"])

        if tm["frac"]
          delimiters << tm["frac_delim"]
          parts << :frac
          part_options << tm["frac"].size
        end
      end
    end

    rest = tm["rest"]
  end

  if zm = /^#{ZONE}$/.match(rest)
    delimiters << (zm["zone_space"] || '')
    if zm["zone_off"]
      parts << :zone_off
    else
      parts << :zone_abb
    end
    part_options << nil

    return GuessMatch.new(delimiters, parts, part_options)

  elsif rest =~ /^\s*$/
    return GuessMatch.new(delimiters, parts, part_options)

  else
    return nil
  end
end

#part_heading_option(text) ⇒ Object



295
296
297
298
299
300
301
302
303
304
305
# File 'lib/embulk/guess/time_format_guess.rb', line 295

def part_heading_option(text)
  if text[0] == '0'
    :zero
  elsif text[0] == ' '
    :blank
  elsif text.size == 1
    :none
  else
    nil
  end
end