Class: Anemone::PageStore

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/anemone/page_store.rb

Instance Method Summary collapse

Constructor Details

#initialize(storage = {}) ⇒ PageStore

Returns a new instance of PageStore.



9
10
11
# File 'lib/anemone/page_store.rb', line 9

def initialize(storage = {})
  @storage = storage
end

Instance Method Details

#[](index) ⇒ Object

We typically index the hash with a URI, but convert it to a String for easier retrieval



15
16
17
# File 'lib/anemone/page_store.rb', line 15

def [](index)
  @storage[index.to_s]
end

#[]=(index, other) ⇒ Object



19
20
21
# File 'lib/anemone/page_store.rb', line 19

def []=(index, other)
  @storage[index.to_s] = other
end

#delete(key) ⇒ Object



23
24
25
# File 'lib/anemone/page_store.rb', line 23

def delete(key)
  @storage.delete key.to_s
end

#each_valueObject



31
32
33
# File 'lib/anemone/page_store.rb', line 31

def each_value
  each { |key, value| yield value }
end

#has_key?(key) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
# File 'lib/anemone/page_store.rb', line 27

def has_key?(key)
  @storage.has_key? key.to_s
end

#has_page?(url) ⇒ Boolean

Does this PageStore contain the specified URL? HTTP and HTTPS versions of a URL are considered to be the same page.

Returns:

  • (Boolean)


51
52
53
54
55
56
57
58
59
# File 'lib/anemone/page_store.rb', line 51

def has_page?(url)
  schemes = %w(http https)
  if schemes.include? url.scheme
    u = url.dup
    return schemes.any? { |s| u.scheme = s; has_key?(u) }
  end

  has_key? url
end

#pages_linking_to(urls) ⇒ Object

If given a single URL (as a String or URI), returns an Array of Pages which link to that URL If given an Array of URLs, returns a Hash (URI => [Page, Page…]) of Pages linking to those URLs



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/anemone/page_store.rb', line 111

def pages_linking_to(urls)
  unless urls.is_a?(Array)
    urls = [urls]
    single = true
  end

  urls.map! do |url|
    unless url.is_a?(URI)
      URI(url) rescue nil
    else
      url
    end
  end
  urls.compact

  links = {}
  urls.each { |url| links[url] = [] }
  values.each do |page|
    urls.each { |url| links[url] << page if page.links.include?(url) }
  end

  if single and !links.empty?
    return links[urls.first]
  else
    return links
  end
end

#shortest_paths!(root) ⇒ Object

Use a breadth-first search to calculate the single-source shortest paths from root to all pages in the PageStore



65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/anemone/page_store.rb', line 65

def shortest_paths!(root)
  root = URI(root) if root.is_a?(String)
  raise "Root node not found" if !has_key?(root)

  q = Queue.new

  q.enq root
  root_page = self[root]
  root_page.depth = 0
  root_page.visited = true
  self[root] = root_page
  while !q.empty?
    page = self[q.deq]
    page.links.each do |u|
      begin
        link = self[u]
        next if link.nil? || !link.fetched? || link.visited

        q << u unless link.redirect?
        link.visited = true
        link.depth = page.depth + 1
        self[u] = link

        if link.redirect?
          u = link.redirect_to
          redo
        end
      end
    end
  end

  self
end

#touch_key(key) ⇒ Object



41
42
43
# File 'lib/anemone/page_store.rb', line 41

def touch_key(key)
  self[key] = Page.new(key)
end

#touch_keys(keys) ⇒ Object



45
46
47
# File 'lib/anemone/page_store.rb', line 45

def touch_keys(keys)
  @storage.merge! keys.inject({}) { |h, k| h[k.to_s] = Page.new(k); h }
end

#uniq!Object

Removes all Pages from storage where redirect? is true



102
103
104
105
# File 'lib/anemone/page_store.rb', line 102

def uniq!
  each_value { |page| delete page.url if page.redirect? }
  self
end

#urls_linking_to(urls) ⇒ Object

If given a single URL (as a String or URI), returns an Array of URLs which link to that URL If given an Array of URLs, returns a Hash (URI => [URI, URI…]) of URLs linking to those URLs



143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/anemone/page_store.rb', line 143

def urls_linking_to(urls)
  unless urls.is_a?(Array)
    urls = [urls] unless urls.is_a?(Array)
    single = true
  end

  links = pages_linking_to(urls)
  links.each { |url, pages| links[url] = pages.map{|p| p.url} }

  if single and !links.empty?
    return links[urls.first]
  else
    return links
  end
end

#valuesObject



35
36
37
38
39
# File 'lib/anemone/page_store.rb', line 35

def values
  result = []
  each { |key, value| result << value }
  result
end