Class: Nokogiri::HTML4::DocumentFragment

Inherits:
XML::DocumentFragment show all
Defined in:
lib/nokogiri/html4/document_fragment.rb

Direct Known Subclasses

Nokogiri::HTML5::DocumentFragment

Constant Summary

Constants inherited from XML::Node

XML::Node::ATTRIBUTE_DECL, XML::Node::ATTRIBUTE_NODE, XML::Node::CDATA_SECTION_NODE, XML::Node::COMMENT_NODE, XML::Node::DECONSTRUCT_KEYS, XML::Node::DECONSTRUCT_METHODS, XML::Node::DOCB_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE, XML::Node::DOCUMENT_NODE, XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE, XML::Node::ELEMENT_DECL, XML::Node::ELEMENT_NODE, XML::Node::ENTITY_DECL, XML::Node::ENTITY_NODE, XML::Node::ENTITY_REF_NODE, XML::Node::HTML_DOCUMENT_NODE, XML::Node::NAMESPACE_DECL, XML::Node::NOTATION_NODE, XML::Node::PI_NODE, XML::Node::TEXT_NODE, XML::Node::XINCLUDE_END, XML::Node::XINCLUDE_START

Constants included from ClassResolver

ClassResolver::VALID_NAMESPACES

Constants included from XML::Searchable

XML::Searchable::LOOKS_LIKE_XPATH

Constants included from XML::PP::Node

XML::PP::Node::COLLECTIONS

Instance Attribute Summary

Attributes inherited from XML::DocumentFragment

#parse_options

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from XML::DocumentFragment

#css, #deconstruct, #dup, #errors, #errors=, #fragment, #name, new, #search, #to_html, #to_s, #to_xhtml, #to_xml

Methods inherited from XML::Node

#<<, #<=>, #==, #[], #[]=, #accept, #add_child, #add_class, #add_namespace_definition, #add_next_sibling, #add_previous_sibling, #after, #ancestors, #append_class, #attribute, #attribute_nodes, #attribute_with_ns, #attributes, #before, #blank?, #canonicalize, #cdata?, #child, #children, #children=, #classes, #clone, #comment?, #content, #content=, #create_external_subset, #create_internal_subset, #css_path, #data_ptr?, #deconstruct_keys, #decorate!, #default_namespace=, #description, #do_xinclude, #document, #document?, #dup, #each, #element?, #element_children, #encode_special_chars, #external_subset, #first_element_child, #fragment, #fragment?, #html?, #inner_html, #inner_html=, #internal_subset, #key?, #keys, #kwattr_add, #kwattr_append, #kwattr_remove, #kwattr_values, #lang, #lang=, #last_element_child, #line, #line=, #matches?, #namespace, #namespace=, #namespace_definitions, #namespace_scopes, #namespaced_key?, #namespaces, #native_content=, new, #next_element, #next_sibling, #node_name, #node_name=, #node_type, #parent, #parent=, #parse, #path, #pointer_id, #prepend_child, #previous_element, #previous_sibling, #processing_instruction?, #read_only?, #remove_attribute, #remove_class, #replace, #serialize, #swap, #text?, #to_html, #to_s, #to_xhtml, #to_xml, #traverse, #unlink, #value?, #values, #wrap, #write_html_to, #write_to, #write_xhtml_to, #write_xml_to, #xml?

Methods included from ClassResolver

#related_class

Methods included from XML::Searchable

#>, #at, #at_css, #at_xpath, #css, #search, #xpath

Methods included from XML::PP::Node

#inspect, #pretty_print

Methods included from Nokogiri::HTML5::Node

#fragment, #inner_html, #write_to

Constructor Details

#initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) {|options| ... } ⇒ DocumentFragment

It’s recommended to use either DocumentFragment.parse or XML::Node#parse rather than call this method directly.

Yields:

  • (options)


90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/nokogiri/html4/document_fragment.rb', line 90

def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) # rubocop:disable Lint/MissingSuper
  return self unless tags

  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
  @parse_options = options
  yield options if block_given?

  if ctx
    preexisting_errors = document.errors.dup
    node_set = ctx.parse("<div>#{tags}</div>", options)
    node_set.first.children.each { |child| child.parent = self } unless node_set.empty?
    self.errors = document.errors - preexisting_errors
  else
    # This is a horrible hack, but I don't care
    path = if /^\s*?<body/i.match?(tags)
      "/html/body"
    else
      "/html/body/node()"
    end

    temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding, options)
    temp_doc.xpath(path).each { |child| child.parent = self }
    self.errors = temp_doc.errors
  end
  children
end

Class Method Details

.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) ⇒ Object

:call-seq:

parse(tags) => DocumentFragment
parse(tags, encoding) => DocumentFragment
parse(tags, encoding, options) => DocumentFragment
parse(tags, encoding) { |options| ... } => DocumentFragment

Parse an HTML4 fragment.

Parameters
  • tags (optional String, or any object that responds to #read such as an IO, or StringIO)

  • encoding (optional String) the name of the encoding that should be used when processing the document. (default nil for auto-detection)

  • options (optional) configuration object that sets options during parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See Nokogiri::XML::ParseOptions for more information.

Yields

If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify

before the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
Returns

DocumentFragment

Example: Parsing a string

fragment = DocumentFragment.parse("<div>Hello World</div>")

Example: Parsing an IO

fragment = File.open("fragment.html") do |file|
  DocumentFragment.parse(file)
end

Example: Specifying encoding

fragment = DocumentFragment.parse(input, "EUC-JP")

Example: Setting parse options dynamically

DocumentFragment.parse("<div>Hello World") do |options|
  options.huge.pedantic
end


49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/nokogiri/html4/document_fragment.rb', line 49

def self.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
  doc = HTML4::Document.new

  if tags.respond_to?(:read)
    # Handle IO-like objects (IO, File, StringIO, etc.)
    # The _read_ method of these objects doesn't accept an +encoding+ parameter.
    # Encoding is usually set when the IO object is created or opened,
    # or by using the _set_encoding_ method.
    #
    # 1. If +encoding+ is provided and the object supports _set_encoding_,
    #    set the encoding before reading.
    # 2. Read the content from the IO-like object.
    #
    # Note: After reading, the content's encoding will be:
    # - The encoding set by _set_encoding_ if it was called
    # - The default encoding of the IO object otherwise
    #
    # For StringIO specifically, _set_encoding_ affects only the internal string,
    # not how the data is read out.
    tags.set_encoding(encoding) if encoding && tags.respond_to?(:set_encoding)
    tags = tags.read
  end

  encoding ||= if tags.respond_to?(:encoding)
    encoding = tags.encoding
    if encoding == ::Encoding::ASCII_8BIT
      "UTF-8"
    else
      encoding.name
    end
  else
    "UTF-8"
  end

  doc.encoding = encoding

  new(doc, tags, nil, options, &block)
end