Module: RDF::RDFa::Reader::REXML
- Defined in:
- lib/rdf/rdfa/reader/rexml.rb
Overview
REXML implementation of an XML parser.
Defined Under Namespace
Classes: NodeProxy, NodeSetProxy
Class Method Summary (collapse)
-
+ (Symbol) library
Returns the name of the underlying XML library.
Instance Method Summary (collapse)
-
- (Object) detect_host_language_version(input, options)
Determine the host language and/or version from options and the input document.
-
- (String) doc_base(base)
Find value of document base.
-
- (Object) doc_errors
Document errors.
-
- (void) initialize_xml(input, options = {})
Initializes the underlying XML library.
-
- (Object) root
Return proxy for document root.
Class Method Details
+ (Symbol) library
Returns the name of the underlying XML library.
14 15 16 |
# File 'lib/rdf/rdfa/reader/rexml.rb', line 14 def self.library :rexml end |
Instance Method Details
- (Object) detect_host_language_version(input, options)
Determine the host language and/or version from options and the input document
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/rdf/rdfa/reader/rexml.rb', line 188 def detect_host_language_version(input, ) @host_language = [:host_language] ? [:host_language].to_sym : nil @version = [:version] ? [:version].to_sym : nil return if @host_language && @version # Snif version based on input case input when ::REXML::Document doc_type_string = input.doctype.to_s version_attr = input.root && input.root.attribute("version").to_s root_element = input.root.name.downcase root_namespace = input.root.namespace.to_s root_attrs = input.root.attributes content_type = "application/xhtml+html" # FIXME: what about other possible XML types? else content_type = input.content_type if input.respond_to?(:content_type) # Determine from head of document head = if input.respond_to?(:read) input.rewind string = input.read(1000) input.rewind string.to_s else input.to_s[0..1000] end doc_type_string = head.match(%r(<!DOCTYPE[^>]*>)m).to_s root = head.match(%r(<[^!\?>]*>)m).to_s root_element = root.match(%r(^<(\S+)[ >])) ? $1 : "" version_attr = root.match(/version\s+=\s+(\S+)[\s">]/m) ? $1 : "" head_element = head.match(%r(<head.*<\/head>)mi) head_doc = ::REXML::Document.new(head_element.to_s) # May determine content-type and/or charset from meta # Easist way is to parse head into a document and iterate # of CSS matches ::REXML::XPath.each(head_doc, "//meta") do |e| if e.attribute("http-equiv").to_s.downcase == 'content-type' content_type, e = e.attribute("content").to_s.downcase.split(";") [:encoding] = $1.downcase if e.to_s =~ /charset=([^\s]*)$/i elsif e.attribute("charset") [:encoding] = e.attr("charset").to_s.downcase end end end # Already using XML parser, determine from DOCTYPE and/or root element @version ||= :rdfa1.0" if doc_type_string =~ /RDFa 1\.0/ @version ||= :rdfa1.0" if version_attr =~ /RDFa 1\.0/ @version ||= :rdfa1.1" if version_attr =~ /RDFa 1\.1/ @version ||= :rdfa1.1" @host_language ||= case content_type when "application/xml" then :xml when "image/svg+xml" then :svg when "text/html" case doc_type_string when /html 4/i then :html4 when /xhtml/i then :xhtml1 when /html/i then :html5 else :html5 end when "application/xhtml+xml" case doc_type_string when /html 4/i then :html4 when /xhtml/i then :xhtml1 when /html/i then :xhtml5 else :xhtml5 end else case root_element when /svg/i then :svg when /html/i then :html5 else :xml end end end |
- (String) doc_base(base)
Find value of document base
286 287 288 289 290 291 292 293 294 295 296 297 298 |
# File 'lib/rdf/rdfa/reader/rexml.rb', line 286 def doc_base(base) # find if the document has a base element case @host_language when :xhtml1, :xhtml5, :html4, :html5 base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil base = base_el.attribute("href").to_s.split("#").first if base_el else xml_base = root.attribute("base", RDF::XML.to_s) if root base = xml_base if xml_base end base || @base_uri end |
- (Object) doc_errors
Document errors
277 278 279 |
# File 'lib/rdf/rdfa/reader/rexml.rb', line 277 def doc_errors [] end |
- (void) initialize_xml(input, options = {})
This method returns an undefined value.
Initializes the underlying XML library.
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/rdf/rdfa/reader/rexml.rb', line 167 def initialize_xml(input, = {}) require 'rexml/document' unless defined?(::REXML) @doc = case input when ::REXML::Document input else # Try to detect charset from input [:encoding] ||= input.charset if input.respond_to?(:charset) # Otherwise, default is utf-8 [:encoding] ||= 'utf-8' # Set xml:base for the document element, if defined @base_uri = base_uri ? base_uri.to_s : nil # Only parse as XML, no HTML mode doc = ::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s) end end |
- (Object) root
Return proxy for document root
271 272 273 |
# File 'lib/rdf/rdfa/reader/rexml.rb', line 271 def root @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root end |