Class: FeedParser::Parser

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/feedparser/parser.rb

Constant Summary collapse

JSONFEED_VERSION_RE =
%r{"version":\s*"https://jsonfeed.org/version/1"}

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text) ⇒ Parser

Note: lets keep/use same API as RSS::Parser for now



17
18
19
20
# File 'lib/feedparser/parser.rb', line 17

def initialize( text )
  @text = text
  @head = @text[0..100].strip     # note: remove leading spaces if present
end

Class Method Details

.parse(text, opts = {}) ⇒ Object

convenience class/factory method



12
13
14
# File 'lib/feedparser/parser.rb', line 12

def self.parse( text, opts={} )
  self.new( text ).parse
end

Instance Method Details

#is_json?Boolean Also known as: json?

Returns:

  • (Boolean)


36
37
38
39
40
41
# File 'lib/feedparser/parser.rb', line 36

def is_json?
  ## check if starts with { for json object/hash
  ##    or if includes jsonfeed prolog
  @head.start_with?( '{' ) ||
  @head =~ JSONFEED_VERSION_RE
end

#is_microformats?Boolean Also known as: microformats?

Returns:

  • (Boolean)


44
45
46
47
48
49
# File 'lib/feedparser/parser.rb', line 44

def is_microformats?
  #  for now check for microformats v2 (e.g. h-entry, h-feed)
  #    check for v1 too - why? why not? (e.g. hentry, hatom ??)
  @text.include?( 'h-entry' ) ||
  @text.include?( 'h-feed' )
end

#is_xml?Boolean Also known as: xml?

note: make format checks callable from outside (that is, use builtin helper methods)

Returns:

  • (Boolean)


27
28
29
30
31
32
# File 'lib/feedparser/parser.rb', line 27

def is_xml?
  ## check if starts with knownn xml prologs
  @head.start_with?( '<?xml' )  ||
  @head.start_with?( '<feed' ) ||
  @head.start_with?( '<rss' )
end

#parseObject



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/feedparser/parser.rb', line 54

def parse
  if is_xml?
     parse_xml
  elsif is_json?
     parse_json
  ##  note: reading/parsing microformat is for now optional
  ##    microformats gem requires nokogiri
  ##       nokogiri (uses libxml c-extensions) makes it hard to install (sometime)
  ##       thus, if you want to use it, please opt-in to keep the install "light"
  elsif defined?( Microformats ) && is_microformats?
     parse_microformats
  else  ## fallback - assume xml for now
     parse_xml
  end
end

#parse_jsonObject



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/feedparser/parser.rb', line 86

def parse_json
  logger.debug "using stdlib json/#{JSON::VERSION}"

  logger.debug "Parsing feed in json..."
  feed_hash = JSON.parse( @text )

  feed = JsonFeedBuilder.build( feed_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end

#parse_microformatsObject



71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/feedparser/parser.rb', line 71

def parse_microformats
  logger.debug "using microformats/#{Microformats::VERSION}"

  logger.debug "Parsing feed in html (w/ microformats)..."

  collection = Microformats.parse( @text )
  collection_hash = collection.to_hash

  feed = HyFeedBuilder.build( collection_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end

#parse_xmlObject



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/feedparser/parser.rb', line 99

def parse_xml
  logger.debug "using stdlib rss/#{RSS::VERSION}"

  parser = RSS::Parser.new( @text )

  parser.do_validate            = false
  parser.ignore_unknown_element = true

  logger.debug "Parsing feed in xml..."
  feed_wild = parser.parse  # not yet normalized

  logger.debug "  feed.class=#{feed_wild.class.name}"

  if feed_wild.is_a?( RSS::Atom::Feed )
    feed = AtomFeedBuilder.build( feed_wild, @text )
  else  # -- assume RSS::Rss::Feed
    feed = RssFeedBuilder.build( feed_wild, @text )
  end

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end