Class: FeedNinja
- Inherits:
-
Object
- Object
- FeedNinja
- Defined in:
- lib/feed_ninja/feed_ninja.rb
Instance Attribute Summary collapse
-
#limit ⇒ Object
Returns the value of attribute limit.
-
#picture_xpath ⇒ Object
Returns the value of attribute picture_xpath.
-
#text_xpath ⇒ Object
Returns the value of attribute text_xpath.
-
#title_regex ⇒ Object
Returns the value of attribute title_regex.
-
#uri ⇒ Object
Returns the value of attribute uri.
Instance Method Summary collapse
-
#fetch(url) ⇒ Object
get the feed and iterate over the entries.
-
#initialize ⇒ FeedNinja
constructor
A new instance of FeedNinja.
- #initialize_writer(doc) ⇒ Object
-
#picture_at(*xpath) ⇒ Object
DSL convenience setters.
- #process_item(original, feed_type, index) ⇒ Object
- #process_items(doc) ⇒ Object
- #text_at(*xpath) ⇒ Object
- #title_matches(regex) ⇒ Object
- #to_s ⇒ Object
Constructor Details
#initialize ⇒ FeedNinja
Returns a new instance of FeedNinja.
13 14 15 16 17 |
# File 'lib/feed_ninja/feed_ninja.rb', line 13 def initialize @limit = 4 @writer = AtomIshWriter.new @ninja_prefix = "N! " end |
Instance Attribute Details
#limit ⇒ Object
Returns the value of attribute limit.
11 12 13 |
# File 'lib/feed_ninja/feed_ninja.rb', line 11 def limit @limit end |
#picture_xpath ⇒ Object
Returns the value of attribute picture_xpath.
11 12 13 |
# File 'lib/feed_ninja/feed_ninja.rb', line 11 def picture_xpath @picture_xpath end |
#text_xpath ⇒ Object
Returns the value of attribute text_xpath.
11 12 13 |
# File 'lib/feed_ninja/feed_ninja.rb', line 11 def text_xpath @text_xpath end |
#title_regex ⇒ Object
Returns the value of attribute title_regex.
11 12 13 |
# File 'lib/feed_ninja/feed_ninja.rb', line 11 def title_regex @title_regex end |
#uri ⇒ Object
Returns the value of attribute uri.
11 12 13 |
# File 'lib/feed_ninja/feed_ninja.rb', line 11 def uri @uri end |
Instance Method Details
#fetch(url) ⇒ Object
get the feed and iterate over the entries
35 36 37 38 39 40 41 42 43 44 |
# File 'lib/feed_ninja/feed_ninja.rb', line 35 def fetch url open(url) do |feed| if feed.content_encoding == ['gzip'] then feed = Zlib::GzipReader.new(StringIO.new(feed.read)).read end doc = RSS::Parser.parse(feed) initialize_writer(doc) process_items(doc) end end |
#initialize_writer(doc) ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/feed_ninja/feed_ninja.rb', line 19 def initialize_writer doc @writer.updated = DateTime.now.to_s case doc.feed_type when "atom" @writer.title = @ninja_prefix + doc.title.content @writer.link = doc.link.href when "rss" @writer.title = @ninja_prefix + doc.channel.title @writer.link = doc.channel.link else raise "Invalid feed format" end end |
#picture_at(*xpath) ⇒ Object
DSL convenience setters
93 94 95 |
# File 'lib/feed_ninja/feed_ninja.rb', line 93 def picture_at *xpath @picture_xpath = xpath end |
#process_item(original, feed_type, index) ⇒ Object
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/feed_ninja/feed_ninja.rb', line 58 def process_item(original, feed_type, index) @writer.new_entry(index) do |entry| LOGGER.debug{ "making new entry #{index}" } extractor = Extractor.new case feed_type when "atom" entry.title = original.title.content entry.link = original.link.href entry.updated = original.updated entry.id = original.id extractor.fetch original.link.href when "rss" entry.title = original.title entry.link = original.link entry.updated = original.pubDate ? original.pubDate.xmlschema : DateTime.now.to_s entry.id = entry.link extractor.fetch original.link end LOGGER.debug{ "extracting for entry #{index} #{entry}" } entry.images = extractor.extract_images(entry.link, @picture_xpath) LOGGER.debug{ "RATATAT" } entry.summary = extractor.extract_xml @text_xpath LOGGER.debug{ "adding entry #{index} #{entry}" } entry #it's kind of fishy to explicitly have to return the entry here... end end |
#process_items(doc) ⇒ Object
46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/feed_ninja/feed_ninja.rb', line 46 def process_items doc items = doc.items if title_regex items = items.select { |item| title_regex =~ item.title } end threads = [] items.first(@limit).each_with_index do |item, index| threads << Thread.new { process_item(item, doc.feed_type, index) } end ThreadsWait.all_waits(*threads) end |
#text_at(*xpath) ⇒ Object
97 98 99 |
# File 'lib/feed_ninja/feed_ninja.rb', line 97 def text_at *xpath @text_xpath = xpath end |
#title_matches(regex) ⇒ Object
101 102 103 |
# File 'lib/feed_ninja/feed_ninja.rb', line 101 def title_matches regex @title_regex = regex end |
#to_s ⇒ Object
87 88 89 |
# File 'lib/feed_ninja/feed_ninja.rb', line 87 def to_s @writer.to_s end |