Class: Syndication::Atom::Data

Inherits:
Container show all
Defined in:
lib/syndication/atom.rb

Overview

:stopdoc: This object has to be handled specially; the parser feeds in all the REXML events, so the object can reconstruct embedded XML/XHTML. (Normally, the parser handles text buffering for a Container and calls store() when the container’s element is closed.) :startdoc:

Direct Known Subclasses

Content

Constant Summary collapse

ENTITIES =

Table of entities ripped from the XHTML spec.

{
  'Aacute' => 193, 'aacute' => 225, 'Acirc'  => 194,
  'acirc'  => 226, 'acute'  => 180, 'AElig'  => 198,
  'aelig'  => 230, 'Agrave' => 192, 'agrave' => 224,
  'amp'    => 38,  'Aring'  => 197, 'aring'  => 229,
  'Atilde' => 195, 'atilde' => 227, 'Auml'   => 196,
  'auml'   => 228, 'brvbar' => 166, 'Ccedil' => 199,
  'ccedil' => 231, 'cedil'  => 184, 'cent'   => 162,
  'copy'   => 169, 'curren' => 164, 'deg'    => 176,
  'divide' => 247, 'Eacute' => 201, 'eacute' => 233,
  'Ecirc'  => 202, 'ecirc'  => 234, 'Egrave' => 200,
  'egrave' => 232, 'ETH'    => 208, 'eth'    => 240,
  'Euml'   => 203, 'euml'   => 235, 'frac12' => 189,
  'frac14' => 188, 'frac34' => 190, 'gt'     => 62,
  'Iacute' => 205, 'iacute' => 237, 'Icirc'  => 206,
  'icirc'  => 238, 'iexcl'  => 161, 'Igrave' => 204,
  'igrave' => 236, 'iquest' => 191, 'Iuml'   => 207,
  'iuml'   => 239, 'laquo'  => 171, 'lt'     => 60,
  'macr'   => 175, 'micro'  => 181, 'middot' => 183,
  'nbsp'   => 160, 'not'    => 172, 'Ntilde' => 209,
  'ntilde' => 241, 'Oacute' => 211, 'oacute' => 243,
  'Ocirc'  => 212, 'ocirc'  => 244, 'Ograve' => 210,
  'ograve' => 242, 'ordf'   => 170, 'ordm'   => 186,
  'Oslash' => 216, 'oslash' => 248, 'Otilde' => 213,
  'otilde' => 245, 'Ouml'   => 214, 'ouml'   => 246,
  'para'   => 182, 'plusmn' => 177, 'pound'  => 163,
  'quot'   => 34,  'raquo'  => 187, 'reg'    => 174,
  'sect'   => 167, 'shy'    => 173, 'sup1'   => 185,
  'sup2'   => 178, 'sup3'   => 179, 'szlig'  => 223,
  'THORN'  => 222, 'thorn'  => 254, 'times'  => 215,
  'Uacute' => 218, 'uacute' => 250, 'Ucirc'  => 219,
  'ucirc'  => 251, 'Ugrave' => 217, 'ugrave' => 249,
  'uml'    => 168, 'Uuml'   => 220, 'uuml'   => 252,
  'Yacute' => 221, 'yacute' => 253, 'yen'    => 165,
  'yuml'   => 255
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Container

#parse_date, #store, #store_category, #strip, #tag2method

Constructor Details

#initialize(parent, tag, attrs = nil) ⇒ Data

Returns a new instance of Data.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/syndication/atom.rb', line 89

def initialize(parent, tag, attrs = nil)
  @tag = tag
  @parent = parent
  @type = 'text' # the default, as per the standard
  if attrs['type']
    @type = attrs['type']
  end
  @div_trimmed = false
  case @type
  when 'xhtml'
    @xhtml = ''
  when 'html'
    @html = ''
  when 'text'
    @text = ''
  end
end

Instance Attribute Details

#dataObject (readonly)

The decoded data, if the type is not text or XML



49
50
51
# File 'lib/syndication/atom.rb', line 49

def data
  @data
end

Instance Method Details

#htmlObject

Return value of Data as HTML.



127
128
129
130
131
132
# File 'lib/syndication/atom.rb', line 127

def html
  return @html if @html
  return @xhtml if @xhtml
  return text2html(@text) if @text
  return nil
end

#html2text(html) ⇒ Object

Convert an HTML representation to text. This is done by throwing away all tags and converting all entities. Not ideal, but I can’t think of a better simple approach.



118
119
120
121
122
123
124
# File 'lib/syndication/atom.rb', line 118

def html2text(html)
  text = html.gsub(/<[^>]*>/, '')
  text = text.gsub(/&(\w)+;/) {|x|
    ENTITIES[x] ? ENTITIES[x] : ''
  }
  return text
end

#tag_end(endtag, current) ⇒ Object

Catch tag end events if we’re collecting embedded XHTML.



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/syndication/atom.rb', line 164

def tag_end(endtag, current)
  if @tag == endtag
    if @type == 'xhtml' and !defined? @div_stripped
      @xhtml.sub!(/^\s*<div>\s*/m,'')
      @xhtml.sub!(/\s*<\/div>\s*$/m,'')
      @div_stripped = true
    end
    return @parent
  end
  if @type == 'xhtml'
    t = endtag.sub(/^xhtml:/,'')
    @xhtml += "</#{t}>"
    return self
  else
    super
  end
end

#tag_start(tag, attrs = nil) ⇒ Object

Catch tag start events if we’re collecting embedded XHTML.



154
155
156
157
158
159
160
161
# File 'lib/syndication/atom.rb', line 154

def tag_start(tag, attrs = nil)
  if @type == 'xhtml'
    t = tag.sub(/^xhtml:/,'')
    @xhtml += "<#{t}>"
  else
    super
  end
end

#text(s) ⇒ Object

Store/buffer text in the appropriate internal field.



183
184
185
186
187
188
189
190
191
192
# File 'lib/syndication/atom.rb', line 183

def text(s)
  case @type
  when 'xhtml'
    @xhtml += s
  when 'html'
    @html += s
  when 'text'
    @text += s
  end
end

#text2html(text) ⇒ Object

Convert a text representation to HTML.



108
109
110
111
112
113
# File 'lib/syndication/atom.rb', line 108

def text2html(text)
  html = text.gsub('&','&amp;')
  html.gsub!('<','&lt;')
  html.gsub!('>','&gt;')
  return html
end

#txtObject

Return value of Data as ASCII text. If the field started off as (X)HTML, this is done by ruthlessly discarding markup and entities, so it is highly recommended that you use the XHTML or HTML and convert to text in a more intelligent way.



138
139
140
141
142
143
# File 'lib/syndication/atom.rb', line 138

def txt
  return @text if @text
  return html2text(@xhtml) if @xhtml
  return html2text(@html) if @html
  return nil
end

#xhtmlObject

Return value of Data as XHTML.



146
147
148
149
150
151
# File 'lib/syndication/atom.rb', line 146

def xhtml
  return @xhtml if @xhtml
  return @html if @html
  return text2html(@text) if @text
  return nil
end