Class: FeedParser::LooseFeedParser

Inherits:

Object
HTML::SGMLParser
BetterSGMLParser
FeedParser::LooseFeedParser

Includes:: FeedParserMixin

Defined in:: lib/rfeedparser/parsers.rb

Constant Summary collapse

Elements_No_End_Tag =

['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']

New_Declname_Re =

/[a-zA-Z][-_.a-zA-Z0-9:]*\s*/

Constants inherited from BetterSGMLParser

BetterSGMLParser::Attrfind, BetterSGMLParser::Charref, BetterSGMLParser::Commentclose, BetterSGMLParser::Commentopen, BetterSGMLParser::Declopen, BetterSGMLParser::Endbracket, BetterSGMLParser::Endtagfind, BetterSGMLParser::Endtagopen, BetterSGMLParser::Entityref, BetterSGMLParser::Incomplete, BetterSGMLParser::Interesting, BetterSGMLParser::Piclose, BetterSGMLParser::Piopenbegin, BetterSGMLParser::Shorttag, BetterSGMLParser::Shorttagopen, BetterSGMLParser::Tagfind

Instance Attribute Summary collapse

#bozo ⇒ Object

We write the methods that were in BaseHTMLProcessor in the python code in here directly.
#encoding ⇒ Object

We write the methods that were in BaseHTMLProcessor in the python code in here directly.
#entries ⇒ Object

We write the methods that were in BaseHTMLProcessor in the python code in here directly.
#feeddata ⇒ Object

We write the methods that were in BaseHTMLProcessor in the python code in here directly.
#namespacesInUse ⇒ Object

We write the methods that were in BaseHTMLProcessor in the python code in here directly.

Instance Method Summary collapse

#decodeEntities(element, data) ⇒ Object
#feed ⇒ Object
#feed=(data) ⇒ Object
#initialize(baseuri, baselang, encoding) ⇒ LooseFeedParser constructor

A new instance of LooseFeedParser.
#parse(data) ⇒ Object
#reset ⇒ Object
#sgml_feed ⇒ Object

feed needs to mapped to feeddata, not the SGMLParser method feed.

Methods included from FeedParserMixin

#_parse_date_greek, #_parse_date_hungarian, #_parse_date_iso8601, #_parse_date_mssql, #_parse_date_nate, #_parse_date_onblog, #_parse_date_perforce, #_parse_date_rfc822, #_parse_date_w3dtf, #extract_tuple, #parse_date, #rollover, #set_self

Methods inherited from BetterSGMLParser

#error, #goahead, #handle_decl, #handle_pi, #output, #parse_comment, #parse_endtag, #parse_pi, #parse_starttag

Constructor Details

#initialize(baseuri, baselang, encoding) ⇒ `LooseFeedParser`

Returns a new instance of LooseFeedParser.

# File 'lib/rfeedparser/parsers.rb', line 125

def initialize(baseuri, baselang, encoding)
  startup(baseuri, baselang, encoding)
  super() # Keep the parentheses! No touchy.
end

Instance Attribute Details

#bozo ⇒ `Object`

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



112
113
114

# File 'lib/rfeedparser/parsers.rb', line 112

def bozo
  @bozo
end

#encoding ⇒ `Object`



112
113
114

# File 'lib/rfeedparser/parsers.rb', line 112

def encoding
  @encoding
end

#entries ⇒ `Object`



112
113
114

# File 'lib/rfeedparser/parsers.rb', line 112

def entries
  @entries
end

#feeddata ⇒ `Object`



112
113
114

# File 'lib/rfeedparser/parsers.rb', line 112

def feeddata
  @feeddata
end

#namespacesInUse ⇒ `Object`



112
113
114

# File 'lib/rfeedparser/parsers.rb', line 112

def namespacesInUse
  @namespacesInUse
end

Instance Method Details

#decodeEntities(element, data) ⇒ `Object`

# File 'lib/rfeedparser/parsers.rb', line 156

def decodeEntities(element, data)
  data.gsub!('&#60;', '&lt;')
  data.gsub!('&#x3c;', '&lt;')
  data.gsub!('&#62;', '&gt;')
  data.gsub!('&#x3e;', '&gt;')
  data.gsub!('&#38;', '&amp;')
  data.gsub!('&#x26;', '&amp;')
  data.gsub!('&#34;', '&quot;')
  data.gsub!('&#x22;', '&quot;')
  data.gsub!('&#39;', '&apos;')
  data.gsub!('&#x27;', '&apos;')
  if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
    data.gsub!('&lt;', '<')
    data.gsub!('&gt;', '>')
    data.gsub!('&amp;', '&')
    data.gsub!('&quot;', '"')
    data.gsub!('&apos;', "'")
  end
  return data
end

#feed ⇒ `Object`



117
118
119

# File 'lib/rfeedparser/parsers.rb', line 117

def feed       
  @feeddata
end

#feed=(data) ⇒ `Object`



121
122
123

# File 'lib/rfeedparser/parsers.rb', line 121

def feed=(data)
  @feeddata = data
end

#parse(data) ⇒ `Object`

# File 'lib/rfeedparser/parsers.rb', line 135

def parse(data)
  doctype_regexp = Regexp.new('<!((?!DOCTYPE|--|\[))', Regexp::IGNORECASE) # Getting around a Textmate ident bug
  data.gsub!(doctype_regexp,  '&lt;!\1')
  data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
    clean = tag[1..-3].strip
    if Elements_No_End_Tag.include?clean
      tag
    else
      '<'+clean+'></'+clean+'>'
    end
  end

  data.gsub!(/&#39;/, "'")
  data.gsub!(/&#34;/, "'")
  if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
    data = uconvert(data,'utf-8',@encoding)
  end
  sgml_feed(data) # see the alias above
end

#reset ⇒ `Object`

# File 'lib/rfeedparser/parsers.rb', line 130

def reset
  @pieces = []
  super
end

#sgml_feed ⇒ `Object`

feed needs to mapped to feeddata, not the SGMLParser method feed. I think.

116	# File 'lib/rfeedparser/parsers.rb', line 116 alias :sgml_feed :feed

Class: FeedParser::LooseFeedParser

Constant Summary collapse

Constants inherited from BetterSGMLParser

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from FeedParserMixin

Methods inherited from BetterSGMLParser

Constructor Details

#initialize(baseuri, baselang, encoding) ⇒ LooseFeedParser

Instance Attribute Details

#bozo ⇒ Object

#encoding ⇒ Object

#entries ⇒ Object

#feeddata ⇒ Object

#namespacesInUse ⇒ Object

Instance Method Details

#decodeEntities(element, data) ⇒ Object

#feed ⇒ Object

#feed=(data) ⇒ Object

#parse(data) ⇒ Object

#reset ⇒ Object

#sgml_feed ⇒ Object