Module: Hpricot::Traverse

Included in:
Container::Trav, Leaf::Trav
Defined in:
lib/hpricot/traverse.rb,
lib/hpricot/modules.rb,
lib/hpricot/elements.rb,
lib/hpricot/traverse.rb

Overview

:startdoc:

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.filter(tok, &blk) ⇒ Object



371
372
373
# File 'lib/hpricot/elements.rb', line 371

def self.filter(tok, &blk)
  define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk)
end

Instance Method Details

#after(html = nil, &blk) ⇒ Object

Adds elements immediately after this element, contained in the html string.



121
122
123
# File 'lib/hpricot/traverse.rb', line 121

def after(html = nil, &blk)
  parent.insert_after(make(html, &blk), self)
end

#at(expr) ⇒ Object Also known as: %

Find the first matching node for the CSS or XPath expr string.



341
342
343
# File 'lib/hpricot/traverse.rb', line 341

def at(expr)
  search(expr).first
end

#before(html = nil, &blk) ⇒ Object

Adds elements immediately before this element, contained in the html string.



126
127
128
# File 'lib/hpricot/traverse.rb', line 126

def before(html = nil, &blk)
  parent.insert_before(make(html, &blk), self)
end

#bogusetag?Boolean

Is this object a stranded end tag?

Returns:

  • (Boolean)


21
# File 'lib/hpricot/traverse.rb', line 21

def bogusetag?() BogusETag::Trav === self end

#children_of_type(tag_name) ⇒ Object

Find children of a given tag_name.

ele.children_of_type('p')
  #=> [...array of paragraphs...]


390
391
392
393
394
395
396
# File 'lib/hpricot/traverse.rb', line 390

def children_of_type(tag_name)
  if respond_to? :children
    children.find_all do |x|
      x.respond_to?(:pathname) && x.pathname == tag_name
    end
  end
end

#clean_path(path) ⇒ Object



203
204
205
# File 'lib/hpricot/traverse.rb', line 203

def clean_path(path)
  path.gsub(/^\s+|\s+$/, '')
end

#comment?Boolean

Is this object a comment?

Returns:

  • (Boolean)


19
# File 'lib/hpricot/traverse.rb', line 19

def comment?() Comment::Trav === self end

#css_pathObject

Builds a unique CSS string for this node, from the root of the document containing it.



226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/hpricot/traverse.rb', line 226

def css_path
  if elem? and has_attribute? 'id'
    "##{get_attribute('id')}"
  else
    sim, i, id = 0, 0, 0
    parent.children.each do |e|
      id = sim if e == self
      sim += 1 if e.pathname == self.pathname
    end if parent.children
    p = parent.css_path
    p = p ? "#{p} > #{self.pathname}" : self.pathname
    p += ":nth(#{id})" if sim >= 2
    p
  end
end

#doc?Boolean

Is this object the enclosing HTML or XML document?

Returns:

  • (Boolean)


7
# File 'lib/hpricot/traverse.rb', line 7

def doc?() Doc::Trav === self end

#doctype?Boolean

Is this object a doctype tag?

Returns:

  • (Boolean)


15
# File 'lib/hpricot/traverse.rb', line 15

def doctype?() DocType::Trav === self end

#elem?Boolean

Is this object an HTML or XML element?

Returns:

  • (Boolean)


9
# File 'lib/hpricot/traverse.rb', line 9

def elem?() Elem::Trav === self end

#followingObject

Find all nodes which follow the current one.



114
115
116
117
118
# File 'lib/hpricot/traverse.rb', line 114

def following
  sibs = parent.children 
  si = sibs.index(self) + 1 
  return Elements[*sibs[si...sibs.length]] 
end

#get_subnode(*indexes) ⇒ Object



138
139
140
141
142
143
144
# File 'lib/hpricot/traverse.rb', line 138

def get_subnode(*indexes)
  n = self
  indexes.each {|index|
    n = n.get_subnode_internal(index)
  }
  n
end

#html(inner = nil, &blk) ⇒ Object Also known as: inner_html

Builds an HTML string from the contents of this node.



168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/hpricot/traverse.rb', line 168

def html(inner = nil, &blk)
  if inner or blk
    altered!
    case inner
    when Array
      self.children = inner
    else
      self.children = make(inner, &blk)
    end
    reparent self.children
  else
    if respond_to?(:children) and children
      children.map { |x| x.output("") }.join
    else
      ""
    end
  end
end

#index(name) ⇒ Object



47
48
49
50
51
52
53
54
55
56
# File 'lib/hpricot/traverse.rb', line 47

def index(name)
  i = 0
  return i if name == "*"
  children.each do |x|
    return i if (x.respond_to?(:name) and name == x.name) or
      (x.text? and name == "text()")
    i += 1
  end if children
  -1
end

#inner_html=(inner) ⇒ Object Also known as: innerHTML=

Inserts new contents into the current node, based on the HTML contained in string inner.



191
192
193
# File 'lib/hpricot/traverse.rb', line 191

def inner_html=(inner)
  html(inner || [])
end

#inner_textObject Also known as: innerText

Builds a string from the text contained in this node. All HTML elements are removed.



158
159
160
161
162
163
164
# File 'lib/hpricot/traverse.rb', line 158

def inner_text
  if respond_to?(:children) and children
    children.map { |x| x.inner_text }.join
  else
    ""
  end
end

#make(input = nil, &blk) ⇒ Object

Parses an HTML string, making an HTML fragment based on the options used to create the container document.



25
26
27
28
29
30
31
# File 'lib/hpricot/traverse.rb', line 25

def make(input = nil, &blk)
  if parent and parent.respond_to? :make
    parent.make(input, &blk)
  else
    Hpricot.make(input, &blk).children
  end
end

#nextObject Also known as: next_node

Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.



91
92
93
94
# File 'lib/hpricot/traverse.rb', line 91

def next
  sib = parent.children
  sib[sib.index(self) + 1] if parent
end

#node_positionObject



242
243
244
# File 'lib/hpricot/traverse.rb', line 242

def node_position
  parent.children.index(self)
end

#nodes_at(*pos) ⇒ Object

Puts together an array of neighboring nodes based on their proximity to this node. So, for example, to get the next node, you could use nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1).

This method also accepts ranges and sets of numbers.

ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after
ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node
ele.nodes_at(0, 5..6) # the current node and two others


67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/hpricot/traverse.rb', line 67

def nodes_at(*pos)
  sib = parent.children
  i, si = 0, sib.index(self)
  pos.map! do |r|
    if r.is_a?(Range) and r.begin.is_a?(String)
      r = Range.new(parent.index(r.begin)-si, parent.index(r.end)-si, r.exclude_end?)
    end
    r
  end
  p pos
  Elements[*
    sib.select do |x|
      sel =
        case i - si when *pos
          true
        end
      i += 1
      sel
    end
  ]
end

#positionObject



246
247
248
# File 'lib/hpricot/traverse.rb', line 246

def position
  parent.children_of_type(self.pathname).index(self)
end

#precedingObject

Find all preceding nodes.



107
108
109
110
111
# File 'lib/hpricot/traverse.rb', line 107

def preceding
  sibs = parent.children
  si = sibs.index(self) 
  return Elements[*sibs[0...si]] 
end

#previousObject Also known as: previous_node

Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.



99
100
101
102
103
# File 'lib/hpricot/traverse.rb', line 99

def previous
  sib = parent.children
  x = sib.index(self) - 1
  sib[x] if sib and x >= 0
end

#procins?Boolean

Is this object an XML processing instruction?

Returns:

  • (Boolean)


17
# File 'lib/hpricot/traverse.rb', line 17

def procins?() ProcIns::Trav === self end

#search(expr, &blk) ⇒ Object Also known as: /

Searches this node for all elements matching the CSS or XPath expr. Returns an Elements array containing the matching nodes. If blk is given, it is used to iterate through the matching set.



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/hpricot/traverse.rb', line 254

def search(expr, &blk)
  if Range === expr
    return Elements.expand(at(expr.begin), at(expr.end), expr.exclude_end?)
  end
  last = nil
  nodes = [self]
  done = []
  expr = expr.to_s
  hist = []
  until expr.empty?
      expr = clean_path(expr)
      expr.gsub!(%r!^//!, '')

      case expr
      when %r!^/?\.\.!
          last = expr = $'
          nodes.map! { |node| node.parent }
      when %r!^[>/]\s*!
          last = expr = $'
          nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact]
      when %r!^\+!
          last = expr = $'
          nodes.map! do |node|
              siblings = node.parent.children
              siblings[siblings.index(node)+1]
          end
          nodes.compact!
      when %r!^~!
          last = expr = $'
          nodes.map! do |node|
              siblings = node.parent.children
              siblings[(siblings.index(node)+1)..-1]
          end
          nodes.flatten!
      when %r!^[|,]!
          last = expr = " #$'"
          nodes.shift if nodes.first == self
          done += nodes
          nodes = [self]
      else
          m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a
          after = $'
          mt = after[%r!:[a-z0-9\\*_-]+!i, 0]
          oop = false
          if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]")
            after = $' 
            m[2] += mt
            expr = after
          end
          if m[1] == '#'
              oid = get_element_by_id(m[2])
              nodes = oid ? [oid] : []
              expr = after
          else
              m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "."
              ret = []
              nodes.each do |node|
                  case m[2]
                  when '*'
                      node.traverse_element { |n| ret << n }
                  else
                      if node.respond_to? :get_elements_by_tag_name
                        ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)]
                      end
                  end
              end
              nodes = ret
          end
          last = nil
      end

      hist << expr
      break if hist[-1] == hist[-2]
      nodes, expr = Elements.filter(nodes, expr)
  end
  nodes = done + nodes.flatten.uniq
  if blk
      nodes.each(&blk)
      self
  else
      Elements[*nodes]
  end
end

#swap(html = nil, &blk) ⇒ Object

Replace this element and its contents with the nodes contained in the html string.



133
134
135
136
# File 'lib/hpricot/traverse.rb', line 133

def swap(html = nil, &blk)
  parent.altered!
  parent.replace_child(self, make(html, &blk))
end

#text?Boolean

Is this object an HTML text node?

Returns:

  • (Boolean)


11
# File 'lib/hpricot/traverse.rb', line 11

def text?() Text::Trav === self end

#to_htmlObject Also known as: to_s

Builds an HTML string from this node and its contents. If you need to write to a stream, try calling output(io) as a method on this object.



36
37
38
# File 'lib/hpricot/traverse.rb', line 36

def to_html
  output("")
end

#to_original_htmlObject

Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.



43
44
45
# File 'lib/hpricot/traverse.rb', line 43

def to_original_html
  output("", :preserve => true)
end

#to_plain_textObject

Builds a string from the text contained in this node. All HTML elements are removed.



148
149
150
151
152
153
154
# File 'lib/hpricot/traverse.rb', line 148

def to_plain_text
  if respond_to?(:children) and children
    children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n")
  else
    ""
  end
end

#traverse_element(*names, &block) ⇒ Object

traverse_element traverses elements in the tree. It yields elements in depth first order.

If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.

A nested element is yielded in depth first order as follows.

t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>') 
t.traverse_element("a", "c") {|e| p e}
# =>
{elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
{emptyelem <a id="1">}
{emptyelem <c id="2">}

Universal names are specified as follows.

t = Hpricot(<<'End')
<html>
<meta name="robots" content="index,nofollow">
<meta name="author" content="Who am I?">    
</html>
End
t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# =>
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}


374
375
376
377
378
379
380
381
382
383
# File 'lib/hpricot/traverse.rb', line 374

def traverse_element(*names, &block) # :yields: element
  if names.empty?
    traverse_all_element(&block)
  else
    name_set = {}
    names.each {|n| name_set[n] = true }
    traverse_some_element(name_set, &block)
  end
  nil
end

#traverse_text(&block) ⇒ Object

traverse_text traverses texts in the tree



680
681
682
683
# File 'lib/hpricot/traverse.rb', line 680

def traverse_text(&block) # :yields: text
  traverse_text_internal(&block)
  nil
end

#xmldecl?Boolean

Is this object an XML declaration?

Returns:

  • (Boolean)


13
# File 'lib/hpricot/traverse.rb', line 13

def xmldecl?() XMLDecl::Trav === self end

#xpathObject

Builds a unique XPath string for this node, from the root of the document containing it.



209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/hpricot/traverse.rb', line 209

def xpath
  if elem? and has_attribute? 'id'
    "//#{self.name}[@id='#{get_attribute('id')}']"
  else
    sim, id = 0, 0, 0
    parent.children.each do |e|
      id = sim if e == self
      sim += 1 if e.pathname == self.pathname
    end if parent.children
    p = File.join(parent.xpath, self.pathname)
    p += "[#{id+1}]" if sim >= 2
    p
  end
end