Class: SitemapMaker::SiteTree::Tree

Inherits:
Object
  • Object
show all
Defined in:
lib/SitemapMaker/site_tree.rb

Instance Method Summary collapse

Constructor Details

#initialize(url) ⇒ Tree

Returns a new instance of Tree.



22
23
24
25
26
# File 'lib/SitemapMaker/site_tree.rb', line 22

def initialize( url )
  @page = Nokogiri::HTML open(url)
  @url  = url
  @uri  = URI.parse( url )
end

Instance Method Details

return page have own links



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/SitemapMaker/site_tree.rb', line 29

def own_links
  agent = Mechanize.new
  page  = agent.get(@url)

  # XXX Regexp修正必要
  # test more http://www.rubular.com
  own_link_regexp = Regexp.new(%r!(#{@uri.host})[a-zA-Z0-9.?\/=%&一-龠亜-煕-]+$!)

  # pageが持っているリンク
  have_links      = page.links_with( href: own_link_regexp )

  own_links = []
  have_links.each do |link|
    url = complete_url(page.uri, link.uri)
    own_links << url unless own_links.include?( url )
  end

  own_links
end

#tree_all(url = @url, target_links = {}, deep_level = nil) ⇒ Object

return recursive own links



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/SitemapMaker/site_tree.rb', line 50

def tree_all(url = @url, target_links = {}, deep_level = nil )
  deep_level ||= SitemapMaker::Utils::DEFAULT_LEVEL
  deep_level = deep_level.to_i if deep_level.is_a?(String)
  domain_links = target_links.dup

  Tree.new(url).own_links.each do |link|
    link_key = link.to_s
    # 新規リンクの場合は追記
    if domain_links[link_key].blank?
      begin
        # deep_levelに達したら終了
        if deep_level > 0
          # meta情報の取得
          domain_links[link_key] = get_meta(link_key)
          domain_links.merge! tree_all(link_key, domain_links, deep_level - 1)
        end
      rescue => e
        # TODO 404なら、リンク切れと明記
        domain_links[link_key] = { error: "リンク切れ" }
        puts e
      ensure
        puts link_key
      end
    end

    domain_links
  end

  domain_links
end