Class: FeedParser::Parser

Inherits:
Object
  • Object
show all
Includes:
LogUtils::Logging
Defined in:
lib/feedparser/parser.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xml) ⇒ Parser

Note: lets keep/use same API as RSS::Parser for now



21
22
23
# File 'lib/feedparser/parser.rb', line 21

def initialize( xml )
  @xml = xml
end

Class Method Details

.parse(xml, opts = {}) ⇒ Object

convenience class/factory method



16
17
18
# File 'lib/feedparser/parser.rb', line 16

def self.parse( xml, opts={} )
  self.new( xml ).parse
end

Instance Method Details

#parseObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/feedparser/parser.rb', line 27

def parse
  head = @xml[0..100].strip     # note: remove leading spaces if present

  jsonfeed_version_regex = %r{"version":\s*"https://jsonfeed.org/version/1"}

  ## check if starts with knownn xml prologs
  if head.start_with?( '<?xml' )  ||
     head.start_with?( '<feed/' ) ||
     head.start_with?( '<rss/' )
  ## check if starts with { for json object/hash
  ##    or if includes jsonfeed prolog
     parse_xml
  elsif head.start_with?( '{' ) ||
        head =~ jsonfeed_version_regex
     parse_json
  else  ## assume xml for now
     parse_xml
  end
end

#parse_jsonObject



48
49
50
51
52
53
54
55
56
57
58
# File 'lib/feedparser/parser.rb', line 48

def parse_json
  logger.debug "using stdlib json/#{JSON::VERSION}"

  logger.debug "Parsing feed in json..."
  feed_hash = JSON.parse( @xml )

  feed = JsonFeedBuilder.build( feed_hash )

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end

#parse_xmlObject



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/feedparser/parser.rb', line 61

def parse_xml
  logger.debug "using stdlib rss/#{RSS::VERSION}"

  parser = RSS::Parser.new( @xml )

  parser.do_validate            = false
  parser.ignore_unknown_element = true

  logger.debug "Parsing feed in xml..."
  feed_wild = parser.parse  # not yet normalized

  logger.debug "  feed.class=#{feed_wild.class.name}"

  if feed_wild.is_a?( RSS::Atom::Feed )
    feed = AtomFeedBuilder.build( feed_wild )
  else  # -- assume RSS::Rss::Feed
    feed = RssFeedBuilder.build( feed_wild )
  end

  logger.debug "== #{feed.format} / #{feed.title} =="
  feed # return new (normalized) feed
end