Class: ScheduledUpdateFeedWorker

Inherits:
Object
  • Object
show all
Includes:
Sidekiq::Worker
Defined in:
app/workers/scheduled_update_feed_worker.rb

Overview

Background job for scheduled updates to a feed.

This is a Sidekiq worker

Instance Method Summary collapse

Instance Method Details

#perform(feed_id) ⇒ Object

Fetch and update entries for the passed feed. Receives as argument the id of the feed to be fetched.

If the feed does not exist, further refreshes of the feed are unscheduled. This keeps deleted feeds from having scheduled updates.

Every time a feed update runs the unread entries count for each subscribed user are recalculated and corrected if necessary.

This method is intended to be invoked from Sidekiq, which means it is performed in the background.


26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# File 'app/workers/scheduled_update_feed_worker.rb', line 26

def perform(feed_id)
  # Check that feed actually exists
  if !Feed.exists? feed_id
    Rails.logger.warn "Feed #{feed_id} scheduled to be updated, but it does not exist in the database. Unscheduling further updates."
    ScheduleManager.unschedule_feed_updates feed_id
    return
  end
  feed = Feed.find feed_id

  # Check that feed has not been marked as unavailable
  if !feed.available
    Rails.logger.warn "Feed #{feed_id} scheduled to be updated, but it has been marked as unavailable. Unscheduling further updates."
    ScheduleManager.unschedule_feed_updates feed_id
    return
  end

  Rails.logger.debug "Updating feed #{feed.id} - #{feed.title}"

  # Initialize the number of entries in the feed before and after fetching, so the variables can be
  # used in the ensure clause even if an error is raised while fetching (e.g. the server responds
  # with a HTTP error code)
  entries_before = feed.entries.count
  entries_after = 0

  begin
    FeedClient.fetch feed
  rescue RestClient::Exception,
    RestClient::RequestTimeout,
    SocketError,
    Net::HTTPBadResponse,
    Errno::ETIMEDOUT,
    Errno::ECONNREFUSED,
    Errno::EHOSTUNREACH,
    Errno::ECONNRESET,
    Zlib::GzipFile::Error,
    Zlib::DataError,
    OpenSSL::SSL::SSLError,
    EmptyResponseError,
    FeedAutodiscoveryError,
    FeedFetchError => e

    if feed.failing_since.present? && Time.zone.now - feed.failing_since > Feedbunch::Application.config.autodiscovery_after
      # If fetching from fetch_url has been failing for longer than the configured autodiscovery_after value, try to
      # perform autodiscovery (download the HTML document at feed.url and
      # try to get a <link> element pointing to a feed from its <head>; this should be the current fetch_url).
      #
      # This is intended for the case in which the owner of a feed changes its URL (e.g. migrating from a custom solution
      # to feedburner) but the website itself is still available at the old URL. This happens often. Feedbunch attempts
      # to autocorrect the situation, as long as autodiscovery is enabled.
      #
      # Disable http caching so the most up to date version of the HTML is returned (in case an old version is
      # cached with the old RSS URL).
      FeedClient.fetch feed, http_caching: false, perform_autodiscovery: true
    else
      # If the feed has been failing for less than the configured autodiscovery_after value, re-raise the
      # error to be handled by the global rescue clause
      raise e
    end
  end

  if feed.present? && Feed.exists?(feed&.id)
    feed = feed.reload

    entries_after = feed.entries.count

    # If the update didn't fail, mark the feed as "not currently failing"
    feed.update failing_since: nil unless feed.failing_since.nil?

    # Delete entries that are too old
    OldEntriesCleaner.cleanup feed
  end

rescue RestClient::Exception,
    RestClient::RequestTimeout,
    SocketError,
    Net::HTTPBadResponse,
    Errno::ETIMEDOUT,
    Errno::ECONNREFUSED,
    Errno::EHOSTUNREACH,
    Errno::ECONNRESET,
    Zlib::GzipFile::Error,
    Zlib::DataError,
    OpenSSL::SSL::SSLError,
    EmptyResponseError,
    FeedAutodiscoveryError,
    FeedFetchError => e
  # all these errors mean the feed cannot be updated, but the job itself has not failed. Do not re-raise the error
  if feed.present? && Feed.exists?(feed&.id)
    feed = feed.reload
    # If this is the first update that fails, save the date&time the feed started failing
    feed.update! failing_since: Time.zone.now if feed.failing_since.nil?

    # If the feed has been failing for too long, mark it as unavailable
    if Time.zone.now - feed.failing_since > Feedbunch::Application.config.unavailable_after
      feed.update! available: false
    end
  end

  Rails.logger.warn "Error during scheduled update of feed #{feed_id} - #{feed&.fetch_url}: #{e.message}"
ensure
  if feed.present? && Feed.exists?(feed&.id) && feed&.available
    # Update timestamp of the last time the feed was fetched
    Rails.logger.debug "Updating time of last update for feed #{feed.id} - #{feed.title}"
    feed.reload.update! last_fetched: Time.zone.now

    if entries_after > entries_before
      # If new entries have been fetched, decrement the fetch interval
      ScheduleManager.decrement_update_interval feed
    else
      # If no new entries have been fetched, increment the fetch interval
      ScheduleManager.increment_update_interval feed
    end

    # Update unread entries count for all subscribed users.
    feed.users.find_each do |user|
      SubscriptionsManager.recalculate_unread_count feed, user
    end
  end
end