Module: Nokogiri::HTML5

Defined in:
lib/nokogumbo/html5.rb,
lib/nokogumbo/html5/node.rb,
lib/nokogumbo/html5/document.rb,
lib/nokogumbo/html5/document_fragment.rb

Defined Under Namespace

Modules: Node Classes: Document, DocumentFragment

Constant Summary collapse

HTML_NAMESPACE =

HTML uses the XHTML namespace.

'http://www.w3.org/1999/xhtml'.freeze
MATHML_NAMESPACE =
'http://www.w3.org/1998/Math/MathML'.freeze
SVG_NAMESPACE =
'http://www.w3.org/2000/svg'.freeze
'http://www.w3.org/1999/xlink'.freeze
XML_NAMESPACE =
'http://www.w3.org/XML/1998/namespace'.freeze
XMLNS_NAMESPACE =
'http://www.w3.org/2000/xmlns/'.freeze

Class Method Summary collapse

Class Method Details

.fragment(string, encoding = nil, **options) ⇒ Object

Parse a fragment from string. Convenience method for Nokogiri::HTML5::DocumentFragment.parse.



27
28
29
# File 'lib/nokogumbo/html5.rb', line 27

def self.fragment(string, encoding = nil, **options)
  DocumentFragment.parse(string, encoding, options)
end

.get(uri, options = {}) ⇒ Object

Fetch and parse a HTML document from the web, following redirects, handling https, and determining the character encoding using HTML5 rules. uri may be a String or a URI. options contains http headers and special options. Everything which is not a special option is considered a header. Special options include:

* :follow_limit => number of redirects which are followed
* :basic_auth => [username, password]


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/nokogumbo/html5.rb', line 38

def self.get(uri, options={})
  headers = options.clone
  headers = {:follow_limit => headers} if Numeric === headers # deprecated
  limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10

  require 'net/http'
  uri = URI(uri) unless URI === uri

  http = Net::HTTP.new(uri.host, uri.port)

  # TLS / SSL support
  http.use_ssl = true if uri.scheme == 'https'

  # Pass through Net::HTTP override values, which currently include:
  #   :ca_file, :ca_path, :cert, :cert_store, :ciphers,
  #   :close_on_empty_response, :continue_timeout, :key, :open_timeout,
  #   :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
  #   :verify_callback, :verify_depth, :verify_mode
  options.each do |key, value|
    http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
  end

  request = Net::HTTP::Get.new(uri.request_uri)

  # basic authentication
  auth = headers.delete(:basic_auth)
  auth ||= [uri.user, uri.password] if uri.user && uri.password
  request.basic_auth auth.first, auth.last if auth

  # remaining options are treated as headers
  headers.each {|key, value| request[key.to_s] = value.to_s}

  response = http.request(request)

  case response
  when Net::HTTPSuccess
    doc = parse(reencode(response.body, response['content-type']), options)
    doc.instance_variable_set('@response', response)
    doc.class.send(:attr_reader, :response)
    doc
  when Net::HTTPRedirection
    response.value if limit <= 1
    location = URI.join(uri, response['location'])
    get(location, options.merge(:follow_limit => limit-1))
  else
    response.value
  end
end

.parse(string, url = nil, encoding = nil, **options, &block) ⇒ Object

Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse



21
22
23
# File 'lib/nokogumbo/html5.rb', line 21

def self.parse(string, url = nil, encoding = nil, **options, &block)
  Document.parse(string, url, encoding, **options, &block)
end