Class: Feedbase::FeedParser

Inherits:
Object
  • Object
show all
Defined in:
lib/feedbase/feed_parser.rb

Instance Method Summary collapse

Constructor Details

#initialize(xml) ⇒ FeedParser

Try to have the XML in UTF-8 when you call this.



12
13
14
15
16
# File 'lib/feedbase/feed_parser.rb', line 12

def initialize(xml)
  @xml = xml
  @listener = FeedListener.new
  REXML::Document.parse_stream(@xml, @listener)
end

Instance Method Details

#resultObject



18
19
20
# File 'lib/feedbase/feed_parser.rb', line 18

def result
  tidy(@listener.result)
end

#tidy(feed) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/feedbase/feed_parser.rb', line 22

def tidy(feed)
  feed[:items] = feed[:items].map do |item|
    body = item[:content] || item[:summary] || ""
    new_body = HtmlSimplifier.new(body, "utf-8").result.
       gsub(%r{<p>(\n|<br/>)+</p>}, '').  
       strip + "\n\n"
    item.delete(:summary)
    item[:content] = new_body
    item[:word_count] = word_count(new_body)
    item
  end
  feed
end

#word_count(string) ⇒ Object



36
37
38
# File 'lib/feedbase/feed_parser.rb', line 36

def word_count(string)
  string.gsub(%{</?[^>]+>}, '').split(/\s+/).size
end