Class: FeedNinja

Inherits:

Object

Object
FeedNinja

show all

Defined in:: lib/feed_ninja/feed_ninja.rb

Instance Attribute Summary collapse

#limit ⇒ Object

Returns the value of attribute limit.
#picture_xpath ⇒ Object

Returns the value of attribute picture_xpath.
#text_xpath ⇒ Object

Returns the value of attribute text_xpath.
#title_regex ⇒ Object

Returns the value of attribute title_regex.
#uri ⇒ Object

Returns the value of attribute uri.

Instance Method Summary collapse

#fetch(url) ⇒ Object

get the feed and iterate over the entries.
#initialize ⇒ FeedNinja constructor

A new instance of FeedNinja.
#initialize_writer(doc) ⇒ Object
#picture_at(*xpath) ⇒ Object

DSL convenience setters.
#process_item(original, feed_type, index) ⇒ Object
#process_items(doc) ⇒ Object
#text_at(*xpath) ⇒ Object
#title_matches(regex) ⇒ Object
#to_s ⇒ Object

Constructor Details

#initialize ⇒ `FeedNinja`

Returns a new instance of FeedNinja.

# File 'lib/feed_ninja/feed_ninja.rb', line 13

def initialize
  @limit = 4
  @writer = AtomIshWriter.new
  @ninja_prefix = "N! "
end

Instance Attribute Details

#limit ⇒ `Object`

Returns the value of attribute limit.



11
12
13

# File 'lib/feed_ninja/feed_ninja.rb', line 11

def limit
  @limit
end

#picture_xpath ⇒ `Object`

Returns the value of attribute picture_xpath.



11
12
13

# File 'lib/feed_ninja/feed_ninja.rb', line 11

def picture_xpath
  @picture_xpath
end

#text_xpath ⇒ `Object`

Returns the value of attribute text_xpath.



11
12
13

# File 'lib/feed_ninja/feed_ninja.rb', line 11

def text_xpath
  @text_xpath
end

#title_regex ⇒ `Object`

Returns the value of attribute title_regex.



11
12
13

# File 'lib/feed_ninja/feed_ninja.rb', line 11

def title_regex
  @title_regex
end

#uri ⇒ `Object`

Returns the value of attribute uri.



11
12
13

# File 'lib/feed_ninja/feed_ninja.rb', line 11

def uri
  @uri
end

Instance Method Details

#fetch(url) ⇒ `Object`

get the feed and iterate over the entries

# File 'lib/feed_ninja/feed_ninja.rb', line 35

def fetch url
  open(url) do |feed|
    if feed.content_encoding == ['gzip'] then
      feed = Zlib::GzipReader.new(StringIO.new(feed.read)).read
    end
    doc = RSS::Parser.parse(feed)
    initialize_writer(doc)
    process_items(doc)
  end
end

#initialize_writer(doc) ⇒ `Object`

# File 'lib/feed_ninja/feed_ninja.rb', line 19

def initialize_writer doc
  @writer.updated = DateTime.now.to_s

  case doc.feed_type
  when "atom"
    @writer.title = @ninja_prefix + doc.title.content
    @writer.link = doc.link.href
  when "rss"
    @writer.title = @ninja_prefix + doc.channel.title
    @writer.link = doc.channel.link
  else
    raise "Invalid feed format"
  end
end

#picture_at(*xpath) ⇒ `Object`

DSL convenience setters



93
94
95

# File 'lib/feed_ninja/feed_ninja.rb', line 93

def picture_at *xpath
  @picture_xpath = xpath
end

#process_item(original, feed_type, index) ⇒ `Object`

# File 'lib/feed_ninja/feed_ninja.rb', line 58

def process_item(original, feed_type, index)
  @writer.new_entry(index) do |entry|
    LOGGER.debug{ "making new entry #{index}" }
    extractor = Extractor.new
    case feed_type
    when "atom"
      entry.title = original.title.content
      entry.link = original.link.href
      entry.updated = original.updated
      entry.id = original.id
      extractor.fetch original.link.href
    when "rss"
      entry.title = original.title
      entry.link = original.link
      entry.updated = original.pubDate ? original.pubDate.xmlschema : DateTime.now.to_s
      entry.id = entry.link
      extractor.fetch original.link
    end

    LOGGER.debug{ "extracting for entry #{index} #{entry}" }
    entry.images = extractor.extract_images(entry.link, @picture_xpath)
    LOGGER.debug{ "RATATAT" }
    entry.summary = extractor.extract_xml @text_xpath

    LOGGER.debug{ "adding entry #{index} #{entry}" }
    entry #it's kind of fishy to explicitly have to return the entry here...
  end
end

#process_items(doc) ⇒ `Object`

# File 'lib/feed_ninja/feed_ninja.rb', line 46

def process_items doc
  items = doc.items
  if title_regex
    items = items.select { |item| title_regex =~ item.title }
  end
  threads = []
  items.first(@limit).each_with_index do |item, index|
    threads << Thread.new { process_item(item, doc.feed_type, index) }
  end
  ThreadsWait.all_waits(*threads)
end

#text_at(*xpath) ⇒ `Object`



97
98
99

# File 'lib/feed_ninja/feed_ninja.rb', line 97

def text_at *xpath
  @text_xpath = xpath
end

#title_matches(regex) ⇒ `Object`



101
102
103

# File 'lib/feed_ninja/feed_ninja.rb', line 101

def title_matches regex
  @title_regex = regex
end

#to_s ⇒ `Object`



87
88
89

# File 'lib/feed_ninja/feed_ninja.rb', line 87

def to_s
  @writer.to_s
end

Class: FeedNinja

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ FeedNinja

Instance Attribute Details

#limit ⇒ Object

#picture_xpath ⇒ Object

#text_xpath ⇒ Object

#title_regex ⇒ Object

#uri ⇒ Object

Instance Method Details

#fetch(url) ⇒ Object

#initialize_writer(doc) ⇒ Object

#picture_at(*xpath) ⇒ Object

#process_item(original, feed_type, index) ⇒ Object

#process_items(doc) ⇒ Object

#text_at(*xpath) ⇒ Object

#title_matches(regex) ⇒ Object

#to_s ⇒ Object