Class: Webhookdb::Replicator::IcalendarCalendarV1::EventProcessor

Inherits:
Object
  • Object
show all
Defined in:
lib/webhookdb/replicator/icalendar_calendar_v1.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io, upserter) ⇒ EventProcessor

Returns a new instance of EventProcessor.



331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 331

def initialize(io, upserter)
  @io = io
  @upserter = upserter
  # Keep track of everything we upsert. For any rows we aren't upserting,
  # delete them if they're recurring, or cancel them if they're not recurring.
  # If doing it this way is slow, we could invert this (pull down all IDs and pop from the set).
  @upserted_identities = []
  # Keep track of all upserted recurring items.
  # If we find a RECURRENCE-ID on a later item,
  # we need to modify the item from the sequence by stealing its compound identity.
  @expanded_events_by_uid = {}
  # Delete 'extra' recurring event rows.
  # We need to keep track of how many events each UID spawns,
  # so we can delete any with a higher count.
  @max_sequence_num_by_uid = {}
  # Keep track of the bytes we've read from the file.
  # Never trust Content-Length headers for ical feeds.
  @read_bytes = 0
end

Instance Attribute Details

#read_bytesObject (readonly)

Returns the value of attribute read_bytes.



329
330
331
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 329

def read_bytes
  @read_bytes
end

#upserted_identitiesObject (readonly)

Returns the value of attribute upserted_identities.



329
330
331
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 329

def upserted_identities
  @upserted_identities
end

Instance Method Details

#_ical_entry_from_ruby(r, entry, is_date) ⇒ Object

We need is_date because the recurrence/IceCube schedule may be using times, not date.



504
505
506
507
508
509
510
511
512
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 504

def _ical_entry_from_ruby(r, entry, is_date)
  return {"v" => r.strftime("%Y%m%d")} if is_date
  return {"v" => r.strftime("%Y%m%dT%H%M%SZ")} if r.zone == "UTC"
  tzid = entry["TZID"]
  return {"v" => r.strftime("%Y%m%dT%H%M%S"), "TZID" => tzid} if tzid
  value = entry.fetch("v")
  return {"v" => value} if value.end_with?("Z")
  raise "Cannot create ical entry from: #{r}, #{entry}, is_date: #{is_date}"
end

#_icecube_rule_from_ical(ical) ⇒ Object



514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 514

def _icecube_rule_from_ical(ical)
  # We have seen certain ambiguous rules, like FREQ=WEEKLY with BYMONTHDAY=4.
  # Apple interprets this as every 2 weeks; rrule.js interprets it as on the 4th of the month.
  # IceCube errors, because `day_of_month` isn't valid on a WeeklyRule.
  # In this case, we need to sanitize the string to remove the offending rule piece.
  # There are probably many other offending formats, but we'll add them here as needed.
  unambiguous_ical = nil
  if ical.include?("FREQ=WEEKLY") && ical.include?("BYMONTHDAY=")
    unambiguous_ical = ical.gsub(/BYMONTHDAY=[\d,]+/, "")
  elsif ical.include?("FREQ=MONTHLY") && ical.include?("BYYEARDAY=") && ical.include?("BYMONTHDAY=")
    # Another rule: FREQ=MONTHLY;INTERVAL=3;BYYEARDAY=14;BYMONTHDAY=14
    # Apple interprets this as monthly on the 14th; rrule.js interprets this as never happening.
    # 'day_of_year' isn't valid on a MonthlyRule, so delete the BYYEARDAY component.
    unambiguous_ical = ical.gsub(/BYYEARDAY=[\d,]+/, "")
  end
  if unambiguous_ical
    unambiguous_ical.delete_prefix! ";"
    unambiguous_ical.delete_suffix! ";"
    unambiguous_ical.squeeze!(";")
    ical = unambiguous_ical
  end
  return IceCube::IcalParser.rule_from_ical(ical)
end

#_time_array(h) ⇒ Object



538
539
540
541
542
543
544
545
546
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 538

def _time_array(h)
  expanded_entries = h["v"].split(",").map { |v| h.merge("v" => v) }
  return expanded_entries.map do |e|
    parsed_val, _got_tz = Webhookdb::Replicator::IcalendarEventV1.entry_to_date_or_datetime(e)
    next parsed_val if parsed_val.is_a?(Date)
    # Convert to UTC. We don't work with ActiveSupport timezones in the icalendar code for the most part.
    parsed_val.utc
  end
end

#delete_conditionObject



351
352
353
354
355
356
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 351

def delete_condition
  return nil if @max_sequence_num_by_uid.empty?
  return @max_sequence_num_by_uid.map do |uid, n|
    Sequel[recurring_event_id: uid] & (Sequel[:recurring_event_sequence] > n)
  end.inject(&:|)
end

#each_feed_eventObject



548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 548

def each_feed_event
  bad_event_uids = Set.new
  vevent_lines = []
  in_vevent = false
  while (line = @io.gets)
    @read_bytes += line.size
    begin
      line.rstrip!
    rescue Encoding::CompatibilityError
      # We occassionally get incorrectly encoded files.
      # For example, the response may have a header:
      #   Content-Type: text/calendar; charset=UTF-8
      # but the actual encoding is not:
      #   file -I <filename>
      #   <filename>: text/calendar; charset=iso-8859-1
      # In these cases, there's not much we can do.
      # We can use chardet, but it's a big library and this issue
      # isn't common enough. Instead, try to force the encoding to utf-8,
      # which may break some things, but we'll see what happens.
      line = line.force_encoding("utf-8")
      line = line.scrub
      line = line.rstrip
    end
    if line == "BEGIN:VEVENT"
      in_vevent = true
      vevent_lines << line
    elsif line == "END:VEVENT"
      in_vevent = false
      vevent_lines << line
      h = Webhookdb::Replicator::IcalendarEventV1.vevent_to_hash(vevent_lines)
      vevent_lines.clear
      if h.key?("DTSTART") && h.key?("UID")
        yield h
      else
        bad_event_uids << h.fetch("UID", {}).fetch("v", "[missing]")
      end
    elsif in_vevent
      vevent_lines << line
    end
  end
  return if bad_event_uids.empty?
  @upserter.upserting_replicator.logger.warn("invalid_vevent_hash", vevent_uids: bad_event_uids.sort)
end

#each_projected_event(h) ⇒ Object

Raises:

  • (LocalJumpError)


372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 372

def each_projected_event(h)
  raise LocalJumpError unless block_given?

  uid = h.fetch("UID").fetch("v")

  if (recurrence_id = h["RECURRENCE-ID"])
    # Track down the original item in the projected sequence, so we can update it.
    if Webhookdb::Replicator::IcalendarEventV1.value_is_date_str?(recurrence_id.fetch("v"))
      start = Webhookdb::Replicator::IcalendarEventV1.entry_to_date(recurrence_id)
      startfield = :start_date
    else
      startfield = :start_at
      start = Webhookdb::Replicator::IcalendarEventV1.entry_to_datetime(recurrence_id).first
    end
    candidates = @expanded_events_by_uid[uid]
    if candidates.nil?
      # We can have no recurring events, even with the exclusion date.
      # Not much we can do here- just treat it as a standalone event.
      yield h
      return
    end
    unless (match = candidates.find { |c| c[startfield] == start })
      # There are some providers (like Apple) where an excluded event
      # will be outside the bounds of the RRULE of its owner.
      # Usually the RRULE has an UNTIL that is before the RECURRENCE-ID datetime.
      #
      # In these cases, we can use the event as-is, but we need to
      # make sure it is treated as part of the sequence.
      # So increment the last-seen sequence number for the UID and use that.
      max_seq_num = @max_sequence_num_by_uid[uid] += 1
      h["UID"] = {"v" => "#{uid}-#{max_seq_num}"}
      h["recurring_event_id"] = uid
      h["recurring_event_sequence"] = max_seq_num
      yield h
      return
    end

    # Steal the UID to overwrite the original, and record where it came from.
    # Note that all other fields, like categories, will be overwritten with the fields in this exclusion.
    # This seems to be correct, but we should keep an eye open in case we need to merge
    # these exclusion events into the originals.
    h["UID"] = {"v" => match[:uid]}
    h["recurring_event_sequence"] = match[:recurring_event_sequence]
    # Usually the recurrent event and exclusion have the same last-modified.
    # But we need to set the last-modified to AFTER the original,
    # to make sure it replaces what's in the database (the original un-excluded event
    # may already be present in the database).
    h["LAST-MODIFIED"] = match.fetch(:last_modified_at) + 1.second
    yield h
    return
  end

  unless h["RRULE"]
    yield h
    return
  end

  # We need to convert relevant parsed ical lines back to a string for use in ice_cube.
  # There are other ways to handle this, but this is fine for now.
  ical_params = {}
  if (exdates = h["RDATE"])
    ical_params[:rtimes] = exdates.map { |d| self._time_array(d) }.flatten
  end
  if (exdates = h["EXDATE"])
    ical_params[:extimes] = exdates.map { |d| self._time_array(d) }.flatten
  end
  ical_params[:rrules] = [self._icecube_rule_from_ical(h["RRULE"]["v"])] if h["RRULE"]
  # DURATION is not supported

  start_entry = h.fetch("DTSTART")
  ev_replicator = Webhookdb::Replicator::IcalendarEventV1
  is_date = ev_replicator.entry_is_date_str?(start_entry)
  # Use actual Times for start/end since ice_cube doesn't parse them well
  ical_params[:start_time] = ev_replicator.entry_to_date_or_datetime(start_entry).first
  if ical_params[:start_time].year < 1000
    # This is almost definitely a misconfiguration. Yield it as non-recurring and move on.
    yield h
    return
  end
  has_end_time = false
  if (end_entry = h["DTEND"])
    # the end date is optional. If we don't have one, we should never store one.
    has_end_time = true
    ical_params[:end_time] = ev_replicator.entry_to_date_or_datetime(end_entry).first
    if ical_params[:end_time] < ical_params[:start_time]
      # This is an invalid event. Not sure what it'll do to IceCube so don't send it there.
      # Yield it as a non-recurring event and move on.
      yield h
      return
    end
  end

  schedule = IceCube::Schedule.from_hash(ical_params)
  dont_project_before = Webhookdb::Icalendar.oldest_recurring_event
  dont_project_after = @upserter.now + RECURRENCE_PROJECTION

  # Just like google, track the original event id.
  h["recurring_event_id"] = uid
  final_sequence = -1
  begin
    # Pass in a 'closing time' to avoid a denial of service for an impossible rrule.
    # It is further into the future than the "don't project after"
    # since using something too short causes the calculation to be short-circuited before it should
    # (I'm unclear what the ideal value is, but tests will fail with much less than the number here).
    # This still results in a slow calculation, but there's not much we can do for now.
    # In the future perhaps we should try to pre-validate common problems.
    # See spec for examples.
    dos_cutoff = dont_project_after + 210.days
    schedule.send(:enumerate_occurrences, schedule.start_time, dos_cutoff).each_with_index do |occ, idx|
      next if occ.start_time < dont_project_before
      # Given the original hash, we will modify some fields.
      e = h.dup
      # Keep track of how many events we're managing.
      e["recurring_event_sequence"] = idx
      # The new UID has the sequence number.
      e["UID"] = {"v" => "#{uid}-#{idx}"}
      e["DTSTART"] = self._ical_entry_from_ruby(occ.start_time, start_entry, is_date)
      e["DTEND"] = self._ical_entry_from_ruby(occ.end_time, end_entry, is_date) if has_end_time
      yield e
      final_sequence = idx
      break if occ.start_time > dont_project_after
    end
  rescue Date::Error
    # It's possible we yielded some recurring events too, in that case, treat them as normal,
    # in addition to yielding the event as non-recurring.
    yield h
  end
  @max_sequence_num_by_uid[uid] = final_sequence
  return
end

#processObject



358
359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/webhookdb/replicator/icalendar_calendar_v1.rb', line 358

def process
  self.each_feed_event do |feed_event|
    self.each_projected_event(feed_event) do |ev|
      ident, upserted = @upserter.handle_item(ev)
      @upserted_identities << ident
      if (recurring_uid = upserted.fetch(:recurring_event_id))
        @expanded_events_by_uid[recurring_uid] ||= []
        @expanded_events_by_uid[recurring_uid] << upserted
      end
    end
  end
  @upserter.flush_pending_inserts
end