Module: RDF::Microdata::Reader::Nokogiri

Defined in:
lib/rdf/microdata/reader/nokogiri.rb

Overview

Nokogiri implementation of an HTML parser.

Defined Under Namespace

Classes: NodeProxy, NodeSetProxy

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.librarySymbol

Returns the name of the underlying XML library.


12
13
14
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 12

def self.library
  :nokogiri
end

Instance Method Details

#doc_base(base) ⇒ String

Find value of document base


223
224
225
226
227
228
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 223

def doc_base(base)
  # find if the document has a base element
  base_el = @doc.at_css("html>head>base") 
  base = base_el.attribute("href").to_s.split("#").first if base_el
  base
end

#doc_errorsObject

Document errors


214
215
216
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 214

def doc_errors
  @doc.errors.reject {|e| e.to_s =~ /The doctype must be the first token in the document/}
end

#find_element_by_id(id) ⇒ Object

Look up an element in the document by id


240
241
242
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 240

def find_element_by_id(id)
  (e = @doc.at_css("##{id}")) && NodeProxy.new(e)
end

#getItemsObject

Based on Microdata element.getItems


234
235
236
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 234

def getItems
  @doc.css('[itemscope]').select {|el| !el.has_attribute?('itemprop')}.map {|n| NodeProxy.new(n)}
end

#initialize_html(input, options = {}) ⇒ void

This method returns an undefined value.

Initializes the underlying XML library.


181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 181

def initialize_html(input, options = {})
  require 'nokogiri' unless defined?(::Nokogiri)
  @doc = case input
  when ::Nokogiri::XML::Document
    input
  else
    # Try to detect charset from input
    options[:encoding] ||= input.charset if input.respond_to?(:charset)
    
    # Otherwise, default is utf-8
    options[:encoding] ||= 'utf-8'
    options[:encoding] = options[:encoding].to_s if options[:encoding]

    begin
      require 'nokogumbo' unless defined?(::Nokogumbo)
      input = input.read if input.respond_to?(:read)
      ::Nokogiri::HTML5(input.force_encoding(options[:encoding]))
    rescue LoadError
      ::Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding])
    end
  end
end

#rootObject

Return proxy for document root


208
209
210
# File 'lib/rdf/microdata/reader/nokogiri.rb', line 208

def root
  @root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
end