Module: HTree::Doc::Trav

Includes:
Container::Trav
Included in:
HTree::Doc, Loc
Defined in:
lib/htree/traverse.rb,
lib/htree/modules.rb,
lib/htree/traverse.rb,
lib/htree/traverse.rb,
lib/htree/traverse.rb

Overview

:stopdoc:

Instance Method Summary collapse

Methods included from Container::Trav

#each_child, #each_child_with_index, #each_hyperlink, #each_hyperlink_uri, #each_uri, #filter, #find_element, #traverse_element, #traverse_text_internal

Methods included from Traverse

#bogusetag?, #comment?, #doc?, #doctype?, #elem?, #get_subnode, #procins?, #text?, #traverse_text, #xmldecl?

Instance Method Details

#authorObject

author searches author and return it as a text. It returns nil if not found.

author searchs following information.

  • <meta name=“author” content=“author-name”> in HTML

  • <link rev=“made” title=“author-name”> in HTML

  • <dc:creator>author-name</dc:creator> in RSS

  • <dc:publisher>author-name</dc:publisher> in RSS

  • <author><name>author-name</name></author> in Atom



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/htree/traverse.rb', line 305

def author
  traverse_element('meta',
    '{http://www.w3.org/1999/xhtml}meta') {|e|
    begin
      next unless e.fetch_attr('name').downcase == 'author'
      author = e.fetch_attribute('content').strip
      return author if !author.empty?
    rescue IndexError
    end
  }

  traverse_element('link',
    '{http://www.w3.org/1999/xhtml}link') {|e|
    begin
      next unless e.fetch_attr('rev').downcase == 'made'
      author = e.fetch_attribute('title').strip
      return author if !author.empty?
    rescue IndexError
    end
  } 

  if channel = find_element('{http://purl.org/rss/1.0/}channel')
    channel.traverse_element('{http://purl.org/dc/elements/1.1/}creator') {|e|
      begin
        author = e.extract_text.strip
        return author if !author.empty?
      rescue IndexError
      end
    }
    channel.traverse_element('{http://purl.org/dc/elements/1.1/}publisher') {|e|
      begin
        author = e.extract_text.strip
        return author if !author.empty?
      rescue IndexError
      end
    }
  end

  ['http://www.w3.org/2005/Atom', 'http://purl.org/atom/ns#'].each {|xmlns|
    each_child {|top|
      next unless top.elem?
      if top.name == "{#{xmlns}}feed"
        if feed_author = find_element("{#{xmlns}}author")
          feed_author.traverse_element("{#{xmlns}}name") {|e|
            begin
              author = e.extract_text.strip
              return author if !author.empty?
            rescue IndexError
            end
          }
        end
      end
    }
  }

  nil
end

#has_xmldecl?Boolean

has_xmldecl? returns true if there is an XML declaration on top level.

Returns:

  • (Boolean)


378
379
380
381
# File 'lib/htree/traverse.rb', line 378

def has_xmldecl?
  children.each {|c| return true if c.xmldecl? }
  false
end

#rootObject

root searches root element. If there is no element on top level, it raise HTree::Error. If there is two or more elements on top level, it raise HTree::Error.

Raises:



369
370
371
372
373
374
375
# File 'lib/htree/traverse.rb', line 369

def root
  es = []
  children.each {|c| es << c if c.elem? }
  raise HTree::Error, "no element" if es.empty?
  raise HTree::Error, "multiple top elements" if 1 < es.length
  es[0]
end

#titleObject

title searches title and return it as a text. It returns nil if not found.

title searchs following information.

  • <title>…</title> in HTML

  • <title>…</title> in RSS

  • <title>…</title> in Atom



285
286
287
288
289
290
291
292
293
# File 'lib/htree/traverse.rb', line 285

def title
  e = find_element('title',
    '{http://www.w3.org/1999/xhtml}title',
    '{http://purl.org/rss/1.0/}title',
    '{http://my.netscape.com/rdf/simple/0.9/}title',
    '{http://www.w3.org/2005/Atom}title',
    '{http://purl.org/atom/ns#}title')
  e && e.extract_text
end

#traverse_all_element(&block) ⇒ Object



182
183
184
# File 'lib/htree/traverse.rb', line 182

def traverse_all_element(&block)
  children.each {|c| c.traverse_all_element(&block) }
end

#traverse_some_element(name_set, &block) ⇒ Object



200
201
202
# File 'lib/htree/traverse.rb', line 200

def traverse_some_element(name_set, &block)
  children.each {|c| c.traverse_some_element(name_set, &block) }
end