Class: Feed

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
lib/feed.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.create_feed(xml, feed_url, &block) ⇒ Object

Takes a url an creates a feed object and subscription



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/feed.rb', line 11

def self.create_feed(xml, feed_url, &block)

  if block_given? || block
    @output_block = block
  end

  feed = FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
  return nil unless feed.is_a?(FeedNormalizer::Feed)

  puts "Looking for #{feed_url} in the database"
  if found_feed=Feed.find_by_feed_url(feed_url)

    puts "Feed already exists"

    # Update it
    puts found_feed.import_entries(feed)

    return found_feed

  end
  puts "Not found. Subscribing."

  new_feed = Feed.create(:feed_id => feed.id,
              :title => feed.title.strip,
              # It's very importannt that this is feed_url and not feed.url:
              :feed_url => feed_url.strip, 
              :urls => feed.urls.map {|x| x.strip},
              :parser => feed.parser,
              :last_updated => feed.last_updated || Time.now,
              :authors => feed.authors,
              :copyright => feed.copyright,
              :image => feed.image,
              :generator => feed.generator,
              :ttl => feed.ttl,
              :skip_hours => feed.skip_hours,
              :skip_days => feed.skip_days)
  # create entries
  new_feed.import_entries(feed)
  new_feed
end

.feeds_listObject



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/feed.rb', line 52

def self.feeds_list
  feeds = []
  feeds = feeds + Feed.find(:all, :order => "title asc")

  flagged_entries = VirtualFeed.new
  flagged_entries.title = "Flagged Entries"
  flagged_entries.finder_params = {:conditions => "flagged is not null", :order => "flagged desc"}

  feeds << flagged_entries 

  all_entries = VirtualFeed.new
  all_entries.title = "All Entries"
  all_entries.finder_params = {:order => "id desc"}

  feeds << all_entries 

  feeds
end

Instance Method Details

#import_entries(feed) ⇒ Object

Takes a FeedNormalizer::Feed object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/feed.rb', line 80

def import_entries(feed)

  num_new_items = 0

  # Reverse the entries because they are most recent first.
  feed.entries.reverse.each do |entry| 
    # Check if the entry already exists
    # puts "Looking for existing entry with id #{entry.id}"
    if (existing_entry = self.entries.find(:first, 
                                           :conditions => ["entry_id = ?", entry.id ? entry.id : entry.url ]))

      # Do nothing if the entry has not been updated
      if existing_entry.last_updated == entry.last_updated
        #puts "Skipping #{entry.title}. Already exists."
        next

      # The entry has been updated, so update it.
      else
        puts "Updating #{entry.title}"
        update_entry(existing_entry, entry)
        next 
      end

    else 
      puts "Importing #{entry.title}"
      num_new_items += 1
      import_entry(entry)
    end
  end

  num_new_items
end

#import_entry(entry) ⇒ Object

Takes a FeedNormalizer::Entry object



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/feed.rb', line 114

def import_entry(entry)
  unless entry.id || entry.url 
    puts "Skipping #{entry.title}. Bad item. No entry id or url detected."
    return 
  end   

  self.entries.create(:title => entry.title,
                      :description => entry.description,
                      :content => entry.content,
                      :categories => entry.categories,
                      :date_published => entry.date_published || entry.last_updated,
                      :url => entry.url,
                      :urls => entry.urls,
                      # If the entry.id is nil, use the entry.url (this
                      # happens for some reason on Slashdot and maybe other
                      # websites.
                      :entry_id => entry.id ? entry.id.strip : entry.url.strip, 
                      :authors => entry.authors,
                      :copyright => entry.copyright, 
                      # Apparently entry.last_updated is a Time object
                      :last_updated => entry.last_updated ? entry.last_updated.to_datetime : nil)
end

#previously_updated_atObject

This field is used to determine whether an entry in the feed is new, in which case it is colored in a special way



156
157
158
159
160
161
# File 'lib/feed.rb', line 156

def previously_updated_at
  unless self['previously_updated_at']
    return self['created_at']
  end
  self['previously_updated_at']
end

#puts(string) ⇒ Object



71
72
73
74
75
76
77
# File 'lib/feed.rb', line 71

def puts(string)
  if @output_block
    @output_block.call(string)
  else
    STDOUT.puts( string )
  end
end

#too_soon_to_update?Boolean

Takes a new version of the feed xml Can’t call this “update” because that’s an important ActiveRecord method The block is the output method. If no block is given it a standard block is created that just outputs to stdout. puts calls the output lambda when it’s available; otherwise it prints to STDOUT.

Returns:

  • (Boolean)


170
171
172
# File 'lib/feed.rb', line 170

def too_soon_to_update?
  self.updated_at.to_time > (Time.now - 3600)
end

#update_entry(old, new) ⇒ Object

The old entry is ActiveRecord. The new one is a FeedNormalizer::Entry



138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'lib/feed.rb', line 138

def update_entry(old, new)
  old.update_attributes(:title => new.title,
                      :description => new.description,
                      :content => new.content,
                      :categories => new.categories,
                      :date_published => new.date_published,
                      :url => new.url,
                      :urls => new.urls,
                      :authors => new.authors,
                      :copyright => new.copyright, 
                      # Apparently new.last_updated is a Time object
                      :last_updated => new.last_updated ? new.last_updated.to_datetime : nil)

end

#update_self(xml, force = false, &block) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/feed.rb', line 174

def update_self(xml, force=false, &block)
  num_new_items = 0
  if block_given?
    @output_block = block
  end

  unless force
    # To be courteous, don't update feeds that have been downloaded in the last
    # hour.
    if too_soon_to_update?
      puts "-> skipping. last update was with the last hour."
      return
    end
  end

  # :updated_at is used for this program's internal bookkeeping, and tracks when the feed was last
  # accessed. :last_updated is the property of the feed.
  
  begin 

    # We're forcing the SimpleRssParser because the other one led to errors with DaringFireball's Atom feed.
    new_feed_content = FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
    # Trye another parser
    unless new_feed_content.is_a?(FeedNormalizer::Feed)
      puts "Failed to update #{self.title}. Try again later."
      LOGGER.debug("FAILED TO UPDATE #{self.title}")
      LOGGER.debug(xml)
      return
    end

    # At this point we're definitely updating the feed.

    # create entries
    # The import_entries method should silently skip entries that already exist
    num_new_items += import_entries(new_feed_content)
    
    # This updates the last_updated timestamp
    self.last_updated = Time.now 

    self.save
    puts "-> %s new items found." % num_new_items
  rescue
    puts "-> There was an error updating the feed #{self.feed_url}."
    raise
  end
  return num_new_items 
end