Module: Hpricot::Traverse
- Included in:
- Container::Trav, Leaf::Trav
- Defined in:
- lib/hpricot/traverse.rb,
lib/hpricot/modules.rb,
lib/hpricot/elements.rb,
lib/hpricot/traverse.rb
Overview
:startdoc:
Class Method Summary collapse
Instance Method Summary collapse
-
#after(html = nil, &blk) ⇒ Object
Adds elements immediately after this element, contained in the
html
string. -
#at(expr) ⇒ Object
(also: #%)
Find the first matching node for the CSS or XPath
expr
string. -
#before(html = nil, &blk) ⇒ Object
Adds elements immediately before this element, contained in the
html
string. -
#bogusetag? ⇒ Boolean
Is this object a stranded end tag?.
-
#children_of_type(tag_name) ⇒ Object
Find children of a given
tag_name
. - #clean_path(path) ⇒ Object
-
#comment? ⇒ Boolean
Is this object a comment?.
-
#css_path ⇒ Object
Builds a unique CSS string for this node, from the root of the document containing it.
-
#doc? ⇒ Boolean
Is this object the enclosing HTML or XML document?.
-
#doctype? ⇒ Boolean
Is this object a doctype tag?.
-
#elem? ⇒ Boolean
Is this object an HTML or XML element?.
-
#following ⇒ Object
Find all nodes which follow the current one.
- #get_subnode(*indexes) ⇒ Object
-
#html(inner = nil, &blk) ⇒ Object
(also: #inner_html)
Builds an HTML string from the contents of this node.
- #index(name) ⇒ Object
-
#inner_html=(inner) ⇒ Object
(also: #innerHTML=)
Inserts new contents into the current node, based on the HTML contained in string
inner
. -
#inner_text ⇒ Object
(also: #innerText)
Builds a string from the text contained in this node.
-
#make(input = nil, &blk) ⇒ Object
Parses an HTML string, making an HTML fragment based on the options used to create the container document.
-
#next ⇒ Object
(also: #next_node)
Returns the node neighboring this node to the south: just below it.
- #node_position ⇒ Object
-
#nodes_at(*pos) ⇒ Object
Puts together an array of neighboring nodes based on their proximity to this node.
- #position ⇒ Object
-
#preceding ⇒ Object
Find all preceding nodes.
-
#previous ⇒ Object
(also: #previous_node)
Returns to node neighboring this node to the north: just above it.
-
#procins? ⇒ Boolean
Is this object an XML processing instruction?.
-
#search(expr, &blk) ⇒ Object
(also: #/)
Searches this node for all elements matching the CSS or XPath
expr
. -
#swap(html = nil, &blk) ⇒ Object
Replace this element and its contents with the nodes contained in the
html
string. -
#text? ⇒ Boolean
Is this object an HTML text node?.
-
#to_html ⇒ Object
(also: #to_s)
Builds an HTML string from this node and its contents.
-
#to_original_html ⇒ Object
Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.
-
#to_plain_text ⇒ Object
Builds a string from the text contained in this node.
-
#traverse_element(*names, &block) ⇒ Object
traverse_element
traverses elements in the tree. -
#traverse_text(&block) ⇒ Object
traverse_text
traverses texts in the tree. -
#xmldecl? ⇒ Boolean
Is this object an XML declaration?.
-
#xpath ⇒ Object
Builds a unique XPath string for this node, from the root of the document containing it.
Class Method Details
Instance Method Details
#after(html = nil, &blk) ⇒ Object
Adds elements immediately after this element, contained in the html
string.
121 122 123 |
# File 'lib/hpricot/traverse.rb', line 121 def after(html = nil, &blk) parent.insert_after(make(html, &blk), self) end |
#at(expr) ⇒ Object Also known as: %
Find the first matching node for the CSS or XPath expr
string.
341 342 343 |
# File 'lib/hpricot/traverse.rb', line 341 def at(expr) search(expr).first end |
#before(html = nil, &blk) ⇒ Object
Adds elements immediately before this element, contained in the html
string.
126 127 128 |
# File 'lib/hpricot/traverse.rb', line 126 def before(html = nil, &blk) parent.insert_before(make(html, &blk), self) end |
#bogusetag? ⇒ Boolean
Is this object a stranded end tag?
21 |
# File 'lib/hpricot/traverse.rb', line 21 def bogusetag?() BogusETag::Trav === self end |
#children_of_type(tag_name) ⇒ Object
Find children of a given tag_name
.
ele.children_of_type('p')
#=> [...array of paragraphs...]
390 391 392 393 394 395 396 |
# File 'lib/hpricot/traverse.rb', line 390 def children_of_type(tag_name) if respond_to? :children children.find_all do |x| x.respond_to?(:pathname) && x.pathname == tag_name end end end |
#clean_path(path) ⇒ Object
203 204 205 |
# File 'lib/hpricot/traverse.rb', line 203 def clean_path(path) path.gsub(/^\s+|\s+$/, '') end |
#comment? ⇒ Boolean
Is this object a comment?
19 |
# File 'lib/hpricot/traverse.rb', line 19 def comment?() Comment::Trav === self end |
#css_path ⇒ Object
Builds a unique CSS string for this node, from the root of the document containing it.
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
# File 'lib/hpricot/traverse.rb', line 226 def css_path if elem? and has_attribute? 'id' "##{get_attribute('id')}" else sim, i, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = parent.css_path p = p ? "#{p} > #{self.pathname}" : self.pathname p += ":nth(#{id})" if sim >= 2 p end end |
#doc? ⇒ Boolean
Is this object the enclosing HTML or XML document?
7 |
# File 'lib/hpricot/traverse.rb', line 7 def doc?() Doc::Trav === self end |
#doctype? ⇒ Boolean
Is this object a doctype tag?
15 |
# File 'lib/hpricot/traverse.rb', line 15 def doctype?() DocType::Trav === self end |
#elem? ⇒ Boolean
Is this object an HTML or XML element?
9 |
# File 'lib/hpricot/traverse.rb', line 9 def elem?() Elem::Trav === self end |
#following ⇒ Object
Find all nodes which follow the current one.
114 115 116 117 118 |
# File 'lib/hpricot/traverse.rb', line 114 def following sibs = parent.children si = sibs.index(self) + 1 return Elements[*sibs[si...sibs.length]] end |
#get_subnode(*indexes) ⇒ Object
138 139 140 141 142 143 144 |
# File 'lib/hpricot/traverse.rb', line 138 def get_subnode(*indexes) n = self indexes.each {|index| n = n.get_subnode_internal(index) } n end |
#html(inner = nil, &blk) ⇒ Object Also known as: inner_html
Builds an HTML string from the contents of this node.
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/hpricot/traverse.rb', line 168 def html(inner = nil, &blk) if inner or blk altered! case inner when Array self.children = inner else self.children = make(inner, &blk) end reparent self.children else if respond_to?(:children) and children children.map { |x| x.output("") }.join else "" end end end |
#index(name) ⇒ Object
47 48 49 50 51 52 53 54 55 56 |
# File 'lib/hpricot/traverse.rb', line 47 def index(name) i = 0 return i if name == "*" children.each do |x| return i if (x.respond_to?(:name) and name == x.name) or (x.text? and name == "text()") i += 1 end if children -1 end |
#inner_html=(inner) ⇒ Object Also known as: innerHTML=
Inserts new contents into the current node, based on the HTML contained in string inner
.
191 192 193 |
# File 'lib/hpricot/traverse.rb', line 191 def inner_html=(inner) html(inner || []) end |
#inner_text ⇒ Object Also known as: innerText
Builds a string from the text contained in this node. All HTML elements are removed.
158 159 160 161 162 163 164 |
# File 'lib/hpricot/traverse.rb', line 158 def inner_text if respond_to?(:children) and children children.map { |x| x.inner_text }.join else "" end end |
#make(input = nil, &blk) ⇒ Object
Parses an HTML string, making an HTML fragment based on the options used to create the container document.
25 26 27 28 29 30 31 |
# File 'lib/hpricot/traverse.rb', line 25 def make(input = nil, &blk) if parent and parent.respond_to? :make parent.make(input, &blk) else Hpricot.make(input, &blk).children end end |
#next ⇒ Object Also known as: next_node
Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.
91 92 93 94 |
# File 'lib/hpricot/traverse.rb', line 91 def next sib = parent.children sib[sib.index(self) + 1] if parent end |
#node_position ⇒ Object
242 243 244 |
# File 'lib/hpricot/traverse.rb', line 242 def node_position parent.children.index(self) end |
#nodes_at(*pos) ⇒ Object
Puts together an array of neighboring nodes based on their proximity to this node. So, for example, to get the next node, you could use nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1)
.
This method also accepts ranges and sets of numbers.
ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after
ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node
ele.nodes_at(0, 5..6) # the current node and two others
67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/hpricot/traverse.rb', line 67 def nodes_at(*pos) sib = parent.children i, si = 0, sib.index(self) pos.map! do |r| if r.is_a?(Range) and r.begin.is_a?(String) r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?) end r end p pos Elements[* sib.select do |x| sel = case i - si when *pos true end i += 1 sel end ] end |
#position ⇒ Object
246 247 248 |
# File 'lib/hpricot/traverse.rb', line 246 def position parent.children_of_type(self.pathname).index(self) end |
#preceding ⇒ Object
Find all preceding nodes.
107 108 109 110 111 |
# File 'lib/hpricot/traverse.rb', line 107 def preceding sibs = parent.children si = sibs.index(self) return Elements[*sibs[0...si]] end |
#previous ⇒ Object Also known as: previous_node
Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.
99 100 101 102 103 |
# File 'lib/hpricot/traverse.rb', line 99 def previous sib = parent.children x = sib.index(self) - 1 sib[x] if sib and x >= 0 end |
#procins? ⇒ Boolean
Is this object an XML processing instruction?
17 |
# File 'lib/hpricot/traverse.rb', line 17 def procins?() ProcIns::Trav === self end |
#search(expr, &blk) ⇒ Object Also known as: /
Searches this node for all elements matching the CSS or XPath expr
. Returns an Elements array containing the matching nodes. If blk
is given, it is used to iterate through the matching set.
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 |
# File 'lib/hpricot/traverse.rb', line 254 def search(expr, &blk) if Range === expr return Elements.(at(expr.begin), at(expr.end), expr.exclude_end?) end last = nil nodes = [self] done = [] expr = expr.to_s hist = [] until expr.empty? expr = clean_path(expr) expr.gsub!(%r!^//!, '') case expr when %r!^/?\.\.! last = expr = $' nodes.map! { |node| node.parent } when %r!^[>/]\s*! last = expr = $' nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact] when %r!^\+! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[siblings.index(node)+1] end nodes.compact! when %r!^~! last = expr = $' nodes.map! do |node| siblings = node.parent.children siblings[(siblings.index(node)+1)..-1] end nodes.flatten! when %r!^[|,]! last = expr = " #$'" nodes.shift if nodes.first == self done += nodes nodes = [self] else m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a after = $' mt = after[%r!:[a-z0-9\\*_-]+!i, 0] oop = false if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]") after = $' m[2] += mt expr = after end if m[1] == '#' oid = get_element_by_id(m[2]) nodes = oid ? [oid] : [] expr = after else m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "." ret = [] nodes.each do |node| case m[2] when '*' node.traverse_element { |n| ret << n } else if node.respond_to? :get_elements_by_tag_name ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)] end end end nodes = ret end last = nil end hist << expr break if hist[-1] == hist[-2] nodes, expr = Elements.filter(nodes, expr) end nodes = done + nodes.flatten.uniq if blk nodes.each(&blk) self else Elements[*nodes] end end |
#swap(html = nil, &blk) ⇒ Object
Replace this element and its contents with the nodes contained in the html
string.
133 134 135 136 |
# File 'lib/hpricot/traverse.rb', line 133 def swap(html = nil, &blk) parent.altered! parent.replace_child(self, make(html, &blk)) end |
#text? ⇒ Boolean
Is this object an HTML text node?
11 |
# File 'lib/hpricot/traverse.rb', line 11 def text?() Text::Trav === self end |
#to_html ⇒ Object Also known as: to_s
Builds an HTML string from this node and its contents. If you need to write to a stream, try calling output(io)
as a method on this object.
36 37 38 |
# File 'lib/hpricot/traverse.rb', line 36 def to_html output("") end |
#to_original_html ⇒ Object
Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.
43 44 45 |
# File 'lib/hpricot/traverse.rb', line 43 def to_original_html output("", :preserve => true) end |
#to_plain_text ⇒ Object
Builds a string from the text contained in this node. All HTML elements are removed.
148 149 150 151 152 153 154 |
# File 'lib/hpricot/traverse.rb', line 148 def to_plain_text if respond_to?(:children) and children children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n") else "" end end |
#traverse_element(*names, &block) ⇒ Object
traverse_element
traverses elements in the tree. It yields elements in depth first order.
If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.
A nested element is yielded in depth first order as follows.
t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>')
t.traverse_element("a", "c") {|e| p e}
# =>
{elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
{emptyelem <a id="1">}
{emptyelem <c id="2">}
Universal names are specified as follows.
t = Hpricot(<<'End')
<html>
<meta name="robots" content="index,nofollow">
<meta name="author" content="Who am I?">
</html>
End
t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# =>
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}
374 375 376 377 378 379 380 381 382 383 |
# File 'lib/hpricot/traverse.rb', line 374 def traverse_element(*names, &block) # :yields: element if names.empty? traverse_all_element(&block) else name_set = {} names.each {|n| name_set[n] = true } traverse_some_element(name_set, &block) end nil end |
#traverse_text(&block) ⇒ Object
traverse_text
traverses texts in the tree
680 681 682 683 |
# File 'lib/hpricot/traverse.rb', line 680 def traverse_text(&block) # :yields: text traverse_text_internal(&block) nil end |
#xmldecl? ⇒ Boolean
Is this object an XML declaration?
13 |
# File 'lib/hpricot/traverse.rb', line 13 def xmldecl?() XMLDecl::Trav === self end |
#xpath ⇒ Object
Builds a unique XPath string for this node, from the root of the document containing it.
209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
# File 'lib/hpricot/traverse.rb', line 209 def xpath if elem? and has_attribute? 'id' "//#{self.name}[@id='#{get_attribute('id')}']" else sim, id = 0, 0, 0 parent.children.each do |e| id = sim if e == self sim += 1 if e.pathname == self.pathname end if parent.children p = File.join(parent.xpath, self.pathname) p += "[#{id+1}]" if sim >= 2 p end end |