Module: RDF::RDFa::Reader::REXML

Defined in:
lib/rdf/rdfa/reader/rexml.rb

Overview

REXML implementation of an XML parser.

See Also:

Defined Under Namespace

Classes: NodeProxy, NodeSetProxy

Class Method Summary (collapse)

Instance Method Summary (collapse)

Class Method Details

+ (Symbol) library

Returns the name of the underlying XML library.

Returns:

  • (Symbol)


14
15
16
# File 'lib/rdf/rdfa/reader/rexml.rb', line 14

def self.library
  :rexml
end

Instance Method Details

- (Object) detect_host_language_version(input, options)

Determine the host language and/or version from options and the input document



188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
# File 'lib/rdf/rdfa/reader/rexml.rb', line 188

def detect_host_language_version(input, options)
  @host_language = options[:host_language] ? options[:host_language].to_sym : nil
  @version = options[:version] ? options[:version].to_sym : nil
  return if @host_language && @version

  # Snif version based on input
  case input
  when ::REXML::Document
    doc_type_string = input.doctype.to_s
    version_attr = input.root && input.root.attribute("version").to_s
    root_element = input.root.name.downcase
    root_namespace = input.root.namespace.to_s
    root_attrs = input.root.attributes
    content_type = "application/xhtml+html" # FIXME: what about other possible XML types?
  else
    content_type = input.content_type if input.respond_to?(:content_type)

    # Determine from head of document
    head = if input.respond_to?(:read)
      input.rewind
      string = input.read(1000)
      input.rewind
      string.to_s
    else
      input.to_s[0..1000]
    end

    doc_type_string = head.match(%r(<!DOCTYPE[^>]*>)m).to_s
    root = head.match(%r(<[^!\?>]*>)m).to_s
    root_element = root.match(%r(^<(\S+)[ >])) ? $1 : ""
    version_attr = root.match(/version\s+=\s+(\S+)[\s">]/m) ? $1 : ""
    head_element = head.match(%r(<head.*<\/head>)mi)
    head_doc = ::REXML::Document.new(head_element.to_s)

    # May determine content-type and/or charset from meta
    # Easist way is to parse head into a document and iterate
    # of CSS matches
    ::REXML::XPath.each(head_doc, "//meta") do |e|
      if e.attribute("http-equiv").to_s.downcase == 'content-type'
        content_type, e = e.attribute("content").to_s.downcase.split(";")
        options[:encoding] = $1.downcase if e.to_s =~ /charset=([^\s]*)$/i
      elsif e.attribute("charset")
        options[:encoding] = e.attr("charset").to_s.downcase
      end
    end
  end

  # Already using XML parser, determine from DOCTYPE and/or root element
  @version ||= :rdfa1.0" if doc_type_string =~ /RDFa 1\.0/
  @version ||= :rdfa1.0" if version_attr =~ /RDFa 1\.0/
  @version ||= :rdfa1.1" if version_attr =~ /RDFa 1\.1/
  @version ||= :rdfa1.1"

  @host_language ||= case content_type
  when "application/xml"  then :xml
  when "image/svg+xml"    then :svg
  when "text/html"
    case doc_type_string
    when /html 4/i        then :html4
    when /xhtml/i         then :xhtml1
    when /html/i          then :html5
    else                       :html5
    end
  when "application/xhtml+xml"
    case doc_type_string
    when /html 4/i        then :html4
    when /xhtml/i         then :xhtml1
    when /html/i          then :xhtml5
    else                       :xhtml5
    end
  else
    case root_element
    when /svg/i           then :svg
    when /html/i          then :html5
    else                       :xml
    end
  end
end

- (String) doc_base(base)

Find value of document base

Parameters:

  • base (String)

    Existing base from URI or :base_uri

Returns:



286
287
288
289
290
291
292
293
294
295
296
297
298
# File 'lib/rdf/rdfa/reader/rexml.rb', line 286

def doc_base(base)
  # find if the document has a base element
  case @host_language
  when :xhtml1, :xhtml5, :html4, :html5
    base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil
    base = base_el.attribute("href").to_s.split("#").first if base_el
  else
    xml_base = root.attribute("base", RDF::XML.to_s) if root
    base = xml_base if xml_base
  end
  
  base || @base_uri
end

- (Object) doc_errors

Document errors



277
278
279
# File 'lib/rdf/rdfa/reader/rexml.rb', line 277

def doc_errors
  []
end

- (void) initialize_xml(input, options = {})

This method returns an undefined value.

Initializes the underlying XML library.

Parameters:

  • options (Hash{Symbol => Object}) (defaults to: {})


167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/rdf/rdfa/reader/rexml.rb', line 167

def initialize_xml(input, options = {})
  require 'rexml/document' unless defined?(::REXML)
  @doc = case input
  when ::REXML::Document
    input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'

    # Set xml:base for the document element, if defined
    @base_uri = base_uri ? base_uri.to_s : nil

    # Only parse as XML, no HTML mode
    doc = ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
  end
end

- (Object) root

Return proxy for document root



271
272
273
# File 'lib/rdf/rdfa/reader/rexml.rb', line 271

def root
  @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
end