Class: FeedParser::LooseFeedParser

Inherits:
BetterSGMLParser show all
Includes:
FeedParserMixin
Defined in:
lib/rfeedparser/parsers.rb

Constant Summary collapse

Elements_No_End_Tag =
['area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param']
New_Declname_Re =
/[a-zA-Z][-_.a-zA-Z0-9:]*\s*/

Constants inherited from BetterSGMLParser

BetterSGMLParser::Attrfind, BetterSGMLParser::Charref, BetterSGMLParser::Commentclose, BetterSGMLParser::Commentopen, BetterSGMLParser::Declopen, BetterSGMLParser::Endbracket, BetterSGMLParser::Endtagfind, BetterSGMLParser::Endtagopen, BetterSGMLParser::Entityref, BetterSGMLParser::Incomplete, BetterSGMLParser::Interesting, BetterSGMLParser::Piclose, BetterSGMLParser::Piopenbegin, BetterSGMLParser::Shorttag, BetterSGMLParser::Shorttagopen, BetterSGMLParser::Tagfind

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from FeedParserMixin

#_parse_date_greek, #_parse_date_hungarian, #_parse_date_iso8601, #_parse_date_mssql, #_parse_date_nate, #_parse_date_onblog, #_parse_date_perforce, #_parse_date_rfc822, #_parse_date_w3dtf, #extract_tuple, #parse_date, #rollover, #set_self

Methods inherited from BetterSGMLParser

#error, #goahead, #handle_decl, #handle_pi, #output, #parse_comment, #parse_endtag, #parse_pi, #parse_starttag

Constructor Details

#initialize(baseuri, baselang, encoding) ⇒ LooseFeedParser

Returns a new instance of LooseFeedParser.



125
126
127
128
# File 'lib/rfeedparser/parsers.rb', line 125

def initialize(baseuri, baselang, encoding)
  startup(baseuri, baselang, encoding)
  super() # Keep the parentheses! No touchy.
end

Instance Attribute Details

#bozoObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



112
113
114
# File 'lib/rfeedparser/parsers.rb', line 112

def bozo
  @bozo
end

#encodingObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



112
113
114
# File 'lib/rfeedparser/parsers.rb', line 112

def encoding
  @encoding
end

#entriesObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



112
113
114
# File 'lib/rfeedparser/parsers.rb', line 112

def entries
  @entries
end

#feeddataObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



112
113
114
# File 'lib/rfeedparser/parsers.rb', line 112

def feeddata
  @feeddata
end

#namespacesInUseObject

We write the methods that were in BaseHTMLProcessor in the python code in here directly. We do this because if we inherited from BaseHTMLProcessor but then included from FeedParserMixin, the methods of Mixin would overwrite the methods we inherited from BaseHTMLProcessor. This is exactly the opposite of what we want to happen!



112
113
114
# File 'lib/rfeedparser/parsers.rb', line 112

def namespacesInUse
  @namespacesInUse
end

Instance Method Details

#decodeEntities(element, data) ⇒ Object



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/rfeedparser/parsers.rb', line 156

def decodeEntities(element, data)
  data.gsub!('<', '<')
  data.gsub!('<', '<')
  data.gsub!('>', '>')
  data.gsub!('>', '>')
  data.gsub!('&', '&')
  data.gsub!('&', '&')
  data.gsub!('"', '"')
  data.gsub!('"', '"')
  data.gsub!(''', ''')
  data.gsub!(''', ''')
  if @contentparams.has_key? 'type' and not ((@contentparams['type'] || 'xml') =~ /xml$/u)
    data.gsub!('&lt;', '<')
    data.gsub!('&gt;', '>')
    data.gsub!('&amp;', '&')
    data.gsub!('&quot;', '"')
    data.gsub!('&apos;', "'")
  end
  return data
end

#feedObject



117
118
119
# File 'lib/rfeedparser/parsers.rb', line 117

def feed       
  @feeddata
end

#feed=(data) ⇒ Object



121
122
123
# File 'lib/rfeedparser/parsers.rb', line 121

def feed=(data)
  @feeddata = data
end

#parse(data) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/rfeedparser/parsers.rb', line 135

def parse(data)
  doctype_regexp = Regexp.new('<!((?!DOCTYPE|--|\[))', Regexp::IGNORECASE) # Getting around a Textmate ident bug
  data.gsub!(doctype_regexp,  '&lt;!\1')
  data.gsub!(/<([^<\s]+?)\s*\/>/) do |tag|
    clean = tag[1..-3].strip
    if Elements_No_End_Tag.include?clean
      tag
    else
      '<'+clean+'></'+clean+'>'
    end
  end

  data.gsub!(/&#39;/, "'")
  data.gsub!(/&#34;/, "'")
  if @encoding and not @encoding.empty? # FIXME unicode check type(u'')
    data = uconvert(data,'utf-8',@encoding)
  end
  sgml_feed(data) # see the alias above
end

#resetObject



130
131
132
133
# File 'lib/rfeedparser/parsers.rb', line 130

def reset
  @pieces = []
  super
end

#sgml_feedObject

feed needs to mapped to feeddata, not the SGMLParser method feed. I think.



116
# File 'lib/rfeedparser/parsers.rb', line 116

alias :sgml_feed :feed