Module: Hpricot::Traverse

Included in:
Container::Trav, Leaf::Trav
Defined in:
lib/hpricot/traverse.rb,
lib/hpricot/modules.rb,
lib/hpricot/elements.rb,
lib/hpricot/traverse.rb

Overview

:startdoc:

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.filter(tok, &blk) ⇒ Object



262
263
264
# File 'lib/hpricot/elements.rb', line 262

def self.filter(tok, &blk)
  define_method("filter[#{tok.is_a?(String) ? tok : tok.inspect}]", &blk)
end

Instance Method Details

#after(html) ⇒ Object

Adds elements immediately after this element, contained in the html string.



76
77
78
# File 'lib/hpricot/traverse.rb', line 76

def after(html)
  parent.insert_after(Hpricot.make(html), self)
end

#at(expr) ⇒ Object Also known as: %

Find the first matching node for the CSS or XPath expr string.



283
284
285
# File 'lib/hpricot/traverse.rb', line 283

def at(expr)
  search(expr).first
end

#before(html) ⇒ Object

Adds elements immediately before this element, contained in the html string.



81
82
83
# File 'lib/hpricot/traverse.rb', line 81

def before(html)
  parent.insert_after(Hpricot.make(html), self)
end

#bogusetag?Boolean

Is this object a stranded end tag?

Returns:

  • (Boolean)


21
# File 'lib/hpricot/traverse.rb', line 21

def bogusetag?() BogusETag::Trav === self end

#children_of_type(tag_name) ⇒ Object

Find children of a given tag_name.

ele.children_of_type('p')
  #=> [...array of paragraphs...]


332
333
334
335
336
337
338
# File 'lib/hpricot/traverse.rb', line 332

def children_of_type(tag_name)
  if respond_to? :children
    children.find_all do |x|
      x.respond_to?(:pathname) && x.pathname == tag_name
    end
  end
end

#clean_path(path) ⇒ Object



148
149
150
# File 'lib/hpricot/traverse.rb', line 148

def clean_path(path)
  path.gsub(/^\s+|\s+$/, '')
end

#comment?Boolean

Is this object a comment?

Returns:

  • (Boolean)


19
# File 'lib/hpricot/traverse.rb', line 19

def comment?() Comment::Trav === self end

#css_pathObject

Builds a unique CSS string for this node, from the root of the document containing it.



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/hpricot/traverse.rb', line 171

def css_path
  if elem? and has_attribute? 'id'
    "##{get_attribute('id')}"
  else
    sim, i, id = 0, 0, 0
    parent.children.each do |e|
      id = sim if e == self
      sim += 1 if e.pathname == self.pathname
    end
    p = parent.css_path
    p = p ? "#{p} > #{self.pathname}" : self.pathname
    p += ":nth(#{id})" if sim >= 2
    p
  end
end

#doc?Boolean

Is this object the enclosing HTML or XML document?

Returns:

  • (Boolean)


7
# File 'lib/hpricot/traverse.rb', line 7

def doc?() Doc::Trav === self end

#doctype?Boolean

Is this object a doctype tag?

Returns:

  • (Boolean)


15
# File 'lib/hpricot/traverse.rb', line 15

def doctype?() DocType::Trav === self end

#elem?Boolean

Is this object an HTML or XML element?

Returns:

  • (Boolean)


9
# File 'lib/hpricot/traverse.rb', line 9

def elem?() Elem::Trav === self end

#get_subnode(*indexes) ⇒ Object



93
94
95
96
97
98
99
# File 'lib/hpricot/traverse.rb', line 93

def get_subnode(*indexes)
  n = self
  indexes.each {|index|
    n = n.get_subnode_internal(index)
  }
  n
end

#inner_htmlObject Also known as: innerHTML

Builds an HTML string from the contents of this node.



119
120
121
122
123
# File 'lib/hpricot/traverse.rb', line 119

def inner_html
  if respond_to? :children
    children.map { |x| x.output("") }.join
  end
end

#inner_html=(inner) ⇒ Object Also known as: innerHTML=

Inserts new contents into the current node, based on the HTML contained in string inner.



128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/hpricot/traverse.rb', line 128

def inner_html=(inner)
  altered!
  case inner
  when String, IO
    self.children = Hpricot.parse(inner).children
  when Array
    self.children = inner
  when nil
    self.children = []
  end
  reparent self.children
end

#inner_textObject Also known as: innerText

Builds a string from the text contained in this node. All HTML elements are removed.



111
112
113
114
115
# File 'lib/hpricot/traverse.rb', line 111

def inner_text
  if respond_to? :children
    children.map { |x| x.inner_text }.join
  end
end

#next_nodeObject

Returns the node neighboring this node to the south: just below it. This method includes text nodes and comments and such.



62
63
64
65
# File 'lib/hpricot/traverse.rb', line 62

def next_node
  sib = parent.children
  sib[sib.index(self) + 1] if parent
end

#node_positionObject



187
188
189
# File 'lib/hpricot/traverse.rb', line 187

def node_position
  parent.children.index(self)
end

#nodes_at(*pos) ⇒ Object

Puts together an array of neighboring nodes based on their proximity to this node. So, for example, to get the next node, you could use nodes_at(1). Or, to get the previous node, use <tt>nodes_at(1).

This method also accepts ranges and sets of numbers.

ele.nodes_at(-3..-1, 1..3) # gets three nodes before and three after
ele.nodes_at(1, 5, 7) # gets three nodes at offsets below the current node
ele.nodes_at(0, 5..6) # the current node and two others


46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/hpricot/traverse.rb', line 46

def nodes_at(*pos)
  sib = parent.children
  i, si = 0, sib.index(self)
  Elements[*
    sib.select do |x|
      sel = case i - si when *pos
              true
            end
      i += 1
      sel
    end
  ]
end

#positionObject



191
192
193
# File 'lib/hpricot/traverse.rb', line 191

def position
  parent.children_of_type(self.pathname).index(self)
end

#previous_nodeObject

Returns to node neighboring this node to the north: just above it. This method includes text nodes and comments and such.



69
70
71
72
73
# File 'lib/hpricot/traverse.rb', line 69

def previous_node
  sib = parent.children
  x = sib.index(self) - 1
  sib[x] if sib and x >= 0
end

#procins?Boolean

Is this object an XML processing instruction?

Returns:

  • (Boolean)


17
# File 'lib/hpricot/traverse.rb', line 17

def procins?() ProcIns::Trav === self end

#search(expr, &blk) ⇒ Object Also known as: /

Searches this node for all elements matching the CSS or XPath expr. Returns an Elements array containing the matching nodes. If blk is given, it is used to iterate through the matching set.



199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/hpricot/traverse.rb', line 199

def search(expr, &blk)
  last = nil
  nodes = [self]
  done = []
  expr = expr.to_s
  hist = []
  until expr.empty?
      expr = clean_path(expr)
      expr.gsub!(%r!^//!, '')

      case expr
      when %r!^/?\.\.!
          last = expr = $'
          nodes.map! { |node| node.parent }
      when %r!^[>/]!
          last = expr = $'
          nodes = Elements[*nodes.map { |node| node.children if node.respond_to? :children }.flatten.compact]
      when %r!^\+!
          last = expr = $'
          nodes.map! do |node|
              siblings = node.parent.children
              siblings[siblings.index(node)+1]
          end
          nodes.compact!
      when %r!^~!
          last = expr = $'
          nodes.map! do |node|
              siblings = node.parent.children
              siblings[(siblings.index(node)+1)..-1]
          end
          nodes.flatten!
      when %r!^[|,]!
          last = expr = " #$'"
          nodes.shift if nodes.first == self
          done += nodes
          nodes = [self]
      else
          m = expr.match(%r!^([#.]?)([a-z0-9\\*_-]*)!i).to_a
          after = $'
          mt = after[%r!:[a-z0-9\\*_-]+!i, 0]
          oop = false
          if mt and not (mt == ":not" or Traverse.method_defined? "filter[#{mt}]")
            after = $' 
            m[2] += mt
            expr = after
          end
          if m[1] == '#'
              oid = get_element_by_id(m[2])
              nodes = oid ? [oid] : []
              expr = after
          else
              m[2] = "*" if after =~ /^\(\)/ || m[2] == "" || m[1] == "."
              ret = []
              nodes.each do |node|
                  case m[2]
                  when '*'
                      node.traverse_element { |n| ret << n }
                  else
                      if node.respond_to? :get_elements_by_tag_name
                        ret += [*node.get_elements_by_tag_name(m[2])] - [*(node unless last)]
                      end
                  end
              end
              nodes = ret
          end
          last = nil
      end

      hist << expr
      break if hist[-1] == hist[-2]
      nodes, expr = Elements.filter(nodes, expr)
  end
  nodes = done + nodes.flatten.uniq
  if blk
      nodes.each(&blk)
      self
  else
      Elements[*nodes]
  end
end

#swap(html) ⇒ Object

Replace this element and its contents with the nodes contained in the html string.



88
89
90
91
# File 'lib/hpricot/traverse.rb', line 88

def swap(html)
  parent.altered!
  parent.replace_child(self, Hpricot.make(html))
end

#text?Boolean

Is this object an HTML text node?

Returns:

  • (Boolean)


11
# File 'lib/hpricot/traverse.rb', line 11

def text?() Text::Trav === self end

#to_htmlObject Also known as: to_s

Builds an HTML string from this node and its contents. If you need to write to a stream, try calling output(io) as a method on this object.



26
27
28
# File 'lib/hpricot/traverse.rb', line 26

def to_html
  output("")
end

#to_original_htmlObject

Attempts to preserve the original HTML of the document, only outputing new tags for elements which have changed.



33
34
35
# File 'lib/hpricot/traverse.rb', line 33

def to_original_html
  output("", :preserve => true)
end

#to_plain_textObject

Builds a string from the text contained in this node. All HTML elements are removed.



103
104
105
106
107
# File 'lib/hpricot/traverse.rb', line 103

def to_plain_text
  if respond_to? :children
    children.map { |x| x.to_plain_text }.join.strip.gsub(/\n{2,}/, "\n\n")
  end
end

#traverse_element(*names, &block) ⇒ Object

traverse_element traverses elements in the tree. It yields elements in depth first order.

If names are empty, it yields all elements. If non-empty names are given, it should be list of universal names.

A nested element is yielded in depth first order as follows.

t = Hpricot('<a id=0><b><a id=1 /></b><c id=2 /></a>') 
t.traverse_element("a", "c") {|e| p e}
# =>
{elem <a id="0"> {elem <b> {emptyelem <a id="1">} </b>} {emptyelem <c id="2">} </a>}
{emptyelem <a id="1">}
{emptyelem <c id="2">}

Universal names are specified as follows.

t = Hpricot(<<'End')
<html>
<meta name="robots" content="index,nofollow">
<meta name="author" content="Who am I?">    
</html>
End
t.traverse_element("{http://www.w3.org/1999/xhtml}meta") {|e| p e}
# =>
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="robots" content="index,nofollow">}
{emptyelem <{http://www.w3.org/1999/xhtml}meta name="author" content="Who am I?">}


316
317
318
319
320
321
322
323
324
325
# File 'lib/hpricot/traverse.rb', line 316

def traverse_element(*names, &block) # :yields: element
  if names.empty?
    traverse_all_element(&block)
  else
    name_set = {}
    names.each {|n| name_set[n] = true }
    traverse_some_element(name_set, &block)
  end
  nil
end

#traverse_text(&block) ⇒ Object

traverse_text traverses texts in the tree



605
606
607
608
# File 'lib/hpricot/traverse.rb', line 605

def traverse_text(&block) # :yields: text
  traverse_text_internal(&block)
  nil
end

#xmldecl?Boolean

Is this object an XML declaration?

Returns:

  • (Boolean)


13
# File 'lib/hpricot/traverse.rb', line 13

def xmldecl?() XMLDecl::Trav === self end

#xpathObject

Builds a unique XPath string for this node, from the root of the document containing it.



154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/hpricot/traverse.rb', line 154

def xpath
  if elem? and has_attribute? 'id'
    "//#{self.name}[@id='#{get_attribute('id')}']"
  else
    sim, id = 0, 0, 0
    parent.children.each do |e|
      id = sim if e == self
      sim += 1 if e.pathname == self.pathname
    end
    p = File.join(parent.xpath, self.pathname)
    p += "[#{id+1}]" if sim >= 2
    p
  end
end