Class: Nokogiri::HTML4::DocumentFragment
- Inherits:
-
XML::DocumentFragment
- Object
- XML::Node
- XML::DocumentFragment
- Nokogiri::HTML4::DocumentFragment
- Defined in:
- lib/nokogiri/html4/document_fragment.rb
Direct Known Subclasses
Constant Summary
Constants inherited from XML::Node
XML::Node::ATTRIBUTE_DECL, XML::Node::ATTRIBUTE_NODE, XML::Node::CDATA_SECTION_NODE, XML::Node::COMMENT_NODE, XML::Node::DECONSTRUCT_KEYS, XML::Node::DECONSTRUCT_METHODS, XML::Node::DOCB_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE, XML::Node::DOCUMENT_NODE, XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE, XML::Node::ELEMENT_DECL, XML::Node::ELEMENT_NODE, XML::Node::ENTITY_DECL, XML::Node::ENTITY_NODE, XML::Node::ENTITY_REF_NODE, XML::Node::HTML_DOCUMENT_NODE, XML::Node::NAMESPACE_DECL, XML::Node::NOTATION_NODE, XML::Node::PI_NODE, XML::Node::TEXT_NODE, XML::Node::XINCLUDE_END, XML::Node::XINCLUDE_START
Constants included from ClassResolver
ClassResolver::VALID_NAMESPACES
Constants included from XML::Searchable
XML::Searchable::LOOKS_LIKE_XPATH
Constants included from XML::PP::Node
Instance Attribute Summary
Attributes inherited from XML::DocumentFragment
Class Method Summary collapse
-
.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) ⇒ Object
:call-seq: parse(tags) => DocumentFragment parse(tags, encoding) => DocumentFragment parse(tags, encoding, options) => DocumentFragment parse(tags, encoding) { |options| … } => DocumentFragment.
Instance Method Summary collapse
-
#initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) {|options| ... } ⇒ DocumentFragment
constructor
It’s recommended to use either DocumentFragment.parse or XML::Node#parse rather than call this method directly.
Methods inherited from XML::DocumentFragment
#css, #deconstruct, #dup, #errors, #errors=, #fragment, #name, new, #search, #to_html, #to_s, #to_xhtml, #to_xml
Methods inherited from XML::Node
#<<, #<=>, #==, #[], #[]=, #accept, #add_child, #add_class, #add_namespace_definition, #add_next_sibling, #add_previous_sibling, #after, #ancestors, #append_class, #attribute, #attribute_nodes, #attribute_with_ns, #attributes, #before, #blank?, #canonicalize, #cdata?, #child, #children, #children=, #classes, #clone, #comment?, #content, #content=, #create_external_subset, #create_internal_subset, #css_path, #data_ptr?, #deconstruct_keys, #decorate!, #default_namespace=, #description, #do_xinclude, #document, #document?, #dup, #each, #element?, #element_children, #encode_special_chars, #external_subset, #first_element_child, #fragment, #fragment?, #html?, #inner_html, #inner_html=, #internal_subset, #key?, #keys, #kwattr_add, #kwattr_append, #kwattr_remove, #kwattr_values, #lang, #lang=, #last_element_child, #line, #line=, #matches?, #namespace, #namespace=, #namespace_definitions, #namespace_scopes, #namespaced_key?, #namespaces, #native_content=, new, #next_element, #next_sibling, #node_name, #node_name=, #node_type, #parent, #parent=, #parse, #path, #pointer_id, #prepend_child, #previous_element, #previous_sibling, #processing_instruction?, #read_only?, #remove_attribute, #remove_class, #replace, #serialize, #swap, #text?, #to_html, #to_s, #to_xhtml, #to_xml, #traverse, #unlink, #value?, #values, #wrap, #write_html_to, #write_to, #write_xhtml_to, #write_xml_to, #xml?
Methods included from ClassResolver
Methods included from XML::Searchable
#>, #at, #at_css, #at_xpath, #css, #search, #xpath
Methods included from XML::PP::Node
Methods included from Nokogiri::HTML5::Node
#fragment, #inner_html, #write_to
Constructor Details
#initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) {|options| ... } ⇒ DocumentFragment
It’s recommended to use either DocumentFragment.parse or XML::Node#parse rather than call this method directly.
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
# File 'lib/nokogiri/html4/document_fragment.rb', line 90 def initialize(document, = nil, ctx = nil, = XML::ParseOptions::DEFAULT_HTML) # rubocop:disable Lint/MissingSuper return self unless = Nokogiri::XML::ParseOptions.new() if Integer === @parse_options = yield if block_given? if ctx preexisting_errors = document.errors.dup node_set = ctx.parse("<div>#{}</div>", ) node_set.first.children.each { |child| child.parent = self } unless node_set.empty? self.errors = document.errors - preexisting_errors else # This is a horrible hack, but I don't care path = if /^\s*?<body/i.match?() "/html/body" else "/html/body/node()" end temp_doc = HTML4::Document.parse("<html><body>#{}", nil, document.encoding, ) temp_doc.xpath(path).each { |child| child.parent = self } self.errors = temp_doc.errors end children end |
Class Method Details
.parse(tags, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) ⇒ Object
:call-seq:
parse(tags) => DocumentFragment
parse(tags, encoding) => DocumentFragment
parse(tags, encoding, options) => DocumentFragment
parse(tags, encoding) { |options| ... } => DocumentFragment
Parse an HTML4 fragment.
- Parameters
-
tags
(optional String, or any object that responds to#read
such as an IO, or StringIO) -
encoding
(optional String) the name of the encoding that should be used when processing the document. (defaultnil
for auto-detection) -
options
(optional) configuration object that sets options during parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See Nokogiri::XML::ParseOptions for more information.
- Yields
-
If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify
before the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
- Returns
-
DocumentFragment
Example: Parsing a string
fragment = DocumentFragment.parse("<div>Hello World</div>")
Example: Parsing an IO
fragment = File.open("fragment.html") do |file|
DocumentFragment.parse(file)
end
Example: Specifying encoding
fragment = DocumentFragment.parse(input, "EUC-JP")
Example: Setting parse options dynamically
DocumentFragment.parse("<div>Hello World") do ||
.huge.pedantic
end
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/nokogiri/html4/document_fragment.rb', line 49 def self.parse(, encoding = nil, = XML::ParseOptions::DEFAULT_HTML, &block) doc = HTML4::Document.new if .respond_to?(:read) # Handle IO-like objects (IO, File, StringIO, etc.) # The _read_ method of these objects doesn't accept an +encoding+ parameter. # Encoding is usually set when the IO object is created or opened, # or by using the _set_encoding_ method. # # 1. If +encoding+ is provided and the object supports _set_encoding_, # set the encoding before reading. # 2. Read the content from the IO-like object. # # Note: After reading, the content's encoding will be: # - The encoding set by _set_encoding_ if it was called # - The default encoding of the IO object otherwise # # For StringIO specifically, _set_encoding_ affects only the internal string, # not how the data is read out. .set_encoding(encoding) if encoding && .respond_to?(:set_encoding) = .read end encoding ||= if .respond_to?(:encoding) encoding = .encoding if encoding == ::Encoding::ASCII_8BIT "UTF-8" else encoding.name end else "UTF-8" end doc.encoding = encoding new(doc, , nil, , &block) end |