Class: Nokogiri::HTML5::Document

Inherits:
Nokogiri::HTML4::Document show all
Defined in:
lib/nokogiri/html5/document.rb,
ext/nokogiri/gumbo.c

Overview

Since v1.12.0

💡 HTML5 functionality is not available when running JRuby.

Constant Summary

Constants inherited from XML::Document

XML::Document::NCNAME_CHAR, XML::Document::NCNAME_RE, XML::Document::NCNAME_START_CHAR

Constants inherited from XML::Node

XML::Node::ATTRIBUTE_DECL, XML::Node::ATTRIBUTE_NODE, XML::Node::CDATA_SECTION_NODE, XML::Node::COMMENT_NODE, XML::Node::DOCB_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE, XML::Node::DOCUMENT_NODE, XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE, XML::Node::ELEMENT_DECL, XML::Node::ELEMENT_NODE, XML::Node::ENTITY_DECL, XML::Node::ENTITY_NODE, XML::Node::ENTITY_REF_NODE, XML::Node::HTML_DOCUMENT_NODE, XML::Node::NAMESPACE_DECL, XML::Node::NOTATION_NODE, XML::Node::PI_NODE, XML::Node::TEXT_NODE, XML::Node::XINCLUDE_END, XML::Node::XINCLUDE_START

Constants included from ClassResolver

ClassResolver::VALID_NAMESPACES

Constants included from XML::Searchable

XML::Searchable::LOOKS_LIKE_XPATH

Constants included from XML::PP::Node

XML::PP::Node::COLLECTIONS

Instance Attribute Summary collapse

Attributes inherited from XML::Document

#errors, #namespace_inheritance

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Nokogiri::HTML4::Document

#meta_encoding, #meta_encoding=, new, #serialize, #title, #title=, #type

Methods inherited from XML::Document

#add_child, #canonicalize, #collect_namespaces, #create_cdata, #create_comment, #create_element, #create_entity, #create_text_node, #decorate, #decorators, #document, #dup, #encoding, #encoding=, #name, #namespaces, new, #remove_namespaces!, #root, #root=, #slop!, #validate, #version

Methods inherited from XML::Node

#<<, #<=>, #==, #[], #[]=, #accept, #add_child, #add_class, #add_namespace_definition, #add_next_sibling, #add_previous_sibling, #after, #ancestors, #append_class, #attribute, #attribute_nodes, #attribute_with_ns, #attributes, #before, #blank?, #canonicalize, #cdata?, #child, #children, #children=, #classes, #comment?, #content, #content=, #create_external_subset, #create_internal_subset, #css_path, #decorate!, #default_namespace=, #description, #do_xinclude, #document, #document?, #dup, #each, #element?, #element_children, #encode_special_chars, #external_subset, #first_element_child, #fragment?, #html?, #inner_html, #inner_html=, #internal_subset, #key?, #keys, #kwattr_add, #kwattr_append, #kwattr_remove, #kwattr_values, #lang, #lang=, #last_element_child, #line, #line=, #matches?, #namespace, #namespace=, #namespace_definitions, #namespace_scopes, #namespaced_key?, #namespaces, #native_content=, new, #next_element, #next_sibling, #node_name, #node_name=, #node_type, #parent, #parent=, #parse, #path, #pointer_id, #prepend_child, #previous_element, #previous_sibling, #processing_instruction?, #read_only?, #remove_attribute, #remove_class, #replace, #serialize, #swap, #text?, #to_html, #to_s, #to_xhtml, #traverse, #unlink, #value?, #values, #wrap, #write_html_to, #write_to, #write_xhtml_to, #write_xml_to, #xml?

Methods included from ClassResolver

#related_class

Methods included from XML::Searchable

#>, #at, #at_css, #at_xpath, #css, #search, #xpath

Methods included from XML::PP::Node

#inspect, #pretty_print

Constructor Details

#initialize(*args) ⇒ Document

:nodoc:


110
111
112
113
# File 'lib/nokogiri/html5/document.rb', line 110

def initialize(*args) # :nodoc:
  super
  @url = nil
end

Instance Attribute Details

#urlObject (readonly)

Get the url name for this document, as passed into Document.parse, Document.read_io, or Document.read_memory


30
31
32
# File 'lib/nokogiri/html5/document.rb', line 30

def url
  @url
end

Class Method Details

.parse(string_or_io, url = nil, encoding = nil, **options) {|options| ... } ⇒ Object

:call-seq:

parse(input)
parse(input, url=nil, encoding=nil, **options)
parse(input, url=nil, encoding=nil) { |options| ... }

Parse HTML5 input.

Parameters
  • input may be a String, or any object that responds to read and close such as an IO, or StringIO.

  • url (optional) is a String indicating the canonical URI where this document is located.

  • encoding (optional) is the encoding that should be used when processing the document.

  • options (optional) is a configuration Hash (or keyword arguments) to set options during parsing. The three currently supported options are :max_errors, :max_tree_depth and :max_attributes, described at Nokogiri::HTML5.

    ⚠ Note that these options are different than those made available by Nokogiri::XML::Document and Nokogiri::HTML4::Document.

  • block (optional) is passed a configuration Hash on which parse options may be set. See Nokogiri::HTML5 for more information and usage.

Returns

Nokogiri::HTML5::Document

Yields:

  • (options)

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/nokogiri/html5/document.rb', line 61

def parse(string_or_io, url = nil, encoding = nil, **options, &block)
  yield options if block
  string_or_io = "" unless string_or_io

  if string_or_io.respond_to?(:encoding) && string_or_io.encoding != Encoding::ASCII_8BIT
    encoding ||= string_or_io.encoding.name
  end

  if string_or_io.respond_to?(:read) && string_or_io.respond_to?(:path)
    url ||= string_or_io.path
  end
  unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
    raise ArgumentError, "not a string or IO object"
  end

  do_parse(string_or_io, url, encoding, options)
end

.read_io(io, url = nil, encoding = nil, **options) ⇒ Object

Create a new document from an IO object.

💡 Most users should prefer Document.parse to this method.

Raises:

  • (ArgumentError)

82
83
84
85
86
# File 'lib/nokogiri/html5/document.rb', line 82

def read_io(io, url = nil, encoding = nil, **options)
  raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)

  do_parse(io, url, encoding, options)
end

.read_memory(string, url = nil, encoding = nil, **options) ⇒ Object

Create a new document from a String.

💡 Most users should prefer Document.parse to this method.

Raises:

  • (ArgumentError)

91
92
93
94
95
# File 'lib/nokogiri/html5/document.rb', line 91

def read_memory(string, url = nil, encoding = nil, **options)
  raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)

  do_parse(string, url, encoding, options)
end

Instance Method Details

#fragment(tags = nil) ⇒ Object

Parse a HTML5 document fragment from tags, returning a HTML5::DocumentFragment.


116
117
118
# File 'lib/nokogiri/html5/document.rb', line 116

def fragment(tags = nil)
  DocumentFragment.new(self, tags, root)
end

#to_xml(options = {}, &block) ⇒ Object

:nodoc:


120
121
122
123
124
# File 'lib/nokogiri/html5/document.rb', line 120

def to_xml(options = {}, &block) # :nodoc:
  # Bypass XML::Document#to_xml which doesn't add
  # XML::Node::SaveOptions::AS_XML like XML::Node#to_xml does.
  XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
end

#xpath_doctypeObject

:call-seq:

xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
Returns

The document type which determines CSS-to-XPath translation.

See CSS::XPathVisitor for more information.


132
133
134
# File 'lib/nokogiri/html5/document.rb', line 132

def xpath_doctype
  Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5
end