Class: FeedParser::Parser
- Inherits:
-
Object
- Object
- FeedParser::Parser
- Includes:
- LogUtils::Logging
- Defined in:
- lib/feedparser/parser.rb
Constant Summary collapse
- JSONFEED_VERSION_RE =
%r{"version":\s*"https://jsonfeed.org/version/1"}
Class Method Summary collapse
-
.parse(text, opts = {}) ⇒ Object
convenience class/factory method.
Instance Method Summary collapse
-
#initialize(text) ⇒ Parser
constructor
Note: lets keep/use same API as RSS::Parser for now.
- #is_json? ⇒ Boolean (also: #json?)
- #is_microformats? ⇒ Boolean (also: #microformats?)
-
#is_xml? ⇒ Boolean
(also: #xml?)
note: make format checks callable from outside (that is, use builtin helper methods).
- #parse ⇒ Object
- #parse_json ⇒ Object
- #parse_microformats ⇒ Object
- #parse_xml ⇒ Object
Constructor Details
#initialize(text) ⇒ Parser
Note: lets keep/use same API as RSS::Parser for now
17 18 19 20 |
# File 'lib/feedparser/parser.rb', line 17 def initialize( text ) @text = text @head = @text[0..100].strip # note: remove leading spaces if present end |
Class Method Details
.parse(text, opts = {}) ⇒ Object
convenience class/factory method
12 13 14 |
# File 'lib/feedparser/parser.rb', line 12 def self.parse( text, opts={} ) self.new( text ).parse end |
Instance Method Details
#is_json? ⇒ Boolean Also known as: json?
36 37 38 39 40 41 |
# File 'lib/feedparser/parser.rb', line 36 def is_json? ## check if starts with { for json object/hash ## or if includes jsonfeed prolog @head.start_with?( '{' ) || @head =~ JSONFEED_VERSION_RE end |
#is_microformats? ⇒ Boolean Also known as: microformats?
44 45 46 47 48 49 |
# File 'lib/feedparser/parser.rb', line 44 def is_microformats? # for now check for microformats v2 (e.g. h-entry, h-feed) # check for v1 too - why? why not? (e.g. hentry, hatom ??) @text.include?( 'h-entry' ) || @text.include?( 'h-feed' ) end |
#is_xml? ⇒ Boolean Also known as: xml?
note: make format checks callable from outside (that is, use builtin helper methods)
27 28 29 30 31 32 |
# File 'lib/feedparser/parser.rb', line 27 def is_xml? ## check if starts with knownn xml prologs @head.start_with?( '<?xml' ) || @head.start_with?( '<feed' ) || @head.start_with?( '<rss' ) end |
#parse ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/feedparser/parser.rb', line 54 def parse if is_xml? parse_xml elsif is_json? parse_json ## note: reading/parsing microformat is for now optional ## microformats gem requires nokogiri ## nokogiri (uses libxml c-extensions) makes it hard to install (sometime) ## thus, if you want to use it, please opt-in to keep the install "light" elsif defined?( Microformats ) && is_microformats? parse_microformats else ## fallback - assume xml for now parse_xml end end |
#parse_json ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 |
# File 'lib/feedparser/parser.rb', line 86 def parse_json logger.debug "using stdlib json/#{JSON::VERSION}" logger.debug "Parsing feed in json..." feed_hash = JSON.parse( @text ) feed = JsonFeedBuilder.build( feed_hash ) logger.debug "== #{feed.format} / #{feed.title} ==" feed # return new (normalized) feed end |
#parse_microformats ⇒ Object
71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/feedparser/parser.rb', line 71 def parse_microformats logger.debug "using microformats/#{Microformats::VERSION}" logger.debug "Parsing feed in html (w/ microformats)..." collection = Microformats.parse( @text ) collection_hash = collection.to_hash feed = HyFeedBuilder.build( collection_hash ) logger.debug "== #{feed.format} / #{feed.title} ==" feed # return new (normalized) feed end |
#parse_xml ⇒ Object
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# File 'lib/feedparser/parser.rb', line 99 def parse_xml logger.debug "using stdlib rss/#{RSS::VERSION}" parser = RSS::Parser.new( @text ) parser.do_validate = false parser.ignore_unknown_element = true logger.debug "Parsing feed in xml..." feed_wild = parser.parse # not yet normalized logger.debug " feed.class=#{feed_wild.class.name}" if feed_wild.is_a?( RSS::Atom::Feed ) feed = AtomFeedBuilder.build( feed_wild, @text ) else # -- assume RSS::Rss::Feed feed = RssFeedBuilder.build( feed_wild, @text ) end logger.debug "== #{feed.format} / #{feed.title} ==" feed # return new (normalized) feed end |