Class: Feedbase::FeedParser
- Inherits:
-
Object
- Object
- Feedbase::FeedParser
- Defined in:
- lib/feedbase/feed_parser.rb
Instance Method Summary collapse
-
#initialize(xml) ⇒ FeedParser
constructor
Try to have the XML in UTF-8 when you call this.
- #result ⇒ Object
- #tidy(feed) ⇒ Object
- #word_count(string) ⇒ Object
Constructor Details
#initialize(xml) ⇒ FeedParser
Try to have the XML in UTF-8 when you call this.
12 13 14 15 16 |
# File 'lib/feedbase/feed_parser.rb', line 12 def initialize(xml) @xml = xml @listener = FeedListener.new REXML::Document.parse_stream(@xml, @listener) end |
Instance Method Details
#result ⇒ Object
18 19 20 |
# File 'lib/feedbase/feed_parser.rb', line 18 def result tidy(@listener.result) end |
#tidy(feed) ⇒ Object
22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/feedbase/feed_parser.rb', line 22 def tidy(feed) feed[:items] = feed[:items].map do |item| body = item[:content] || item[:summary] || "" new_body = HtmlSimplifier.new(body, "utf-8").result. gsub(%r{<p>(\n|<br/>)+</p>}, ''). strip + "\n\n" item.delete(:summary) item[:content] = new_body item[:word_count] = word_count(new_body) item end feed end |
#word_count(string) ⇒ Object
36 37 38 |
# File 'lib/feedbase/feed_parser.rb', line 36 def word_count(string) string.gsub(%{</?[^>]+>}, '').split(/\s+/).size end |