Class: BrokenLinkFinder::LinkManager

Inherits:
Object
  • Object
show all
Defined in:
lib/broken_link_finder/link_manager.rb

Overview

Class responsible for handling the link collection logic.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(sort) ⇒ LinkManager

Returns a new LinkManager instance with empty link collections.



28
29
30
31
32
33
34
35
36
# File 'lib/broken_link_finder/link_manager.rb', line 28

def initialize(sort)
  raise "Sort by either :page or :link, not #{sort}" \
  unless %i[page link].include?(sort)

  @sort = sort
  @lock = Mutex.new

  empty # Initialises the link collections.
end

Instance Attribute Details

Used to prevent crawling a broken link twice.



19
20
21
# File 'lib/broken_link_finder/link_manager.rb', line 19

def all_broken_links
  @all_broken_links
end

Used for building crawl statistics.



25
26
27
# File 'lib/broken_link_finder/link_manager.rb', line 25

def all_ignored_links
  @all_ignored_links
end

Used to prevent crawling an intact link twice.



22
23
24
# File 'lib/broken_link_finder/link_manager.rb', line 22

def all_intact_links
  @all_intact_links
end

Used to map a link (as is) to its absolute (crawlable) form.



16
17
18
# File 'lib/broken_link_finder/link_manager.rb', line 16

def broken_link_map
  @broken_link_map
end

Used for mapping pages to broken links.



7
8
9
# File 'lib/broken_link_finder/link_manager.rb', line 7

def broken_links
  @broken_links
end

#crawl_statsObject (readonly)

Used to record crawl statistics e.g. duration etc.



13
14
15
# File 'lib/broken_link_finder/link_manager.rb', line 13

def crawl_stats
  @crawl_stats
end

Used for mapping pages to ignored links.



10
11
12
# File 'lib/broken_link_finder/link_manager.rb', line 10

def ignored_links
  @ignored_links
end

Instance Method Details

Append key => [value] to the broken link collections. If map: true, then the link will also be recorded in @broken_link_map.



51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/broken_link_finder/link_manager.rb', line 51

def append_broken_link(doc, link, map: true)
  key, value = get_key_value(doc.url, link)

  @lock.synchronize do
    @broken_links[key] = [] unless @broken_links[key]
    @broken_links[key] << value

    @all_broken_links << link

    @broken_link_map[link] = link.make_absolute(doc) if map
  end
end

Append key => [value] to the ignored link collections.



80
81
82
83
84
85
86
87
88
89
# File 'lib/broken_link_finder/link_manager.rb', line 80

def append_ignored_link(url, link)
  key, value = get_key_value(url, link)

  @lock.synchronize do
    @ignored_links[key] = [] unless @ignored_links[key]
    @ignored_links[key] << value

    @all_ignored_links << link
  end
end

Append link to @all_intact_links.



92
93
94
# File 'lib/broken_link_finder/link_manager.rb', line 92

def append_intact_link(link)
  @lock.synchronize { @all_intact_links << link }
end

#emptyObject

Initialise/empty the link collection objects.



39
40
41
42
43
44
45
46
47
# File 'lib/broken_link_finder/link_manager.rb', line 39

def empty
  @broken_links      = {}
  @ignored_links     = {}
  @crawl_stats       = {}
  @broken_link_map   = {}
  @all_broken_links  = Set.new
  @all_intact_links  = Set.new
  @all_ignored_links = Set.new
end

Remove the broken link from the necessary collections.



65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/broken_link_finder/link_manager.rb', line 65

def remove_broken_link(link)
  @lock.synchronize do
    if @sort == :page
      @broken_links.each { |_k, links| links.delete(link) }
      @broken_links.delete_if { |_k, links| links.empty? }
    else
      @broken_links.delete(link)
    end

    @all_broken_links.delete(link)
    @all_intact_links << link
  end
end

#sortObject

Sorts the link collection’s keys and values alphabetically.



97
98
99
100
101
102
103
104
105
106
# File 'lib/broken_link_finder/link_manager.rb', line 97

def sort
  @broken_links.values.map(&:uniq!)
  @ignored_links.values.map(&:uniq!)

  @broken_links  = @broken_links.sort_by  { |k, _v| k }.to_h
  @ignored_links = @ignored_links.sort_by { |k, _v| k }.to_h

  @broken_links.each  { |_k, v| v.sort! }
  @ignored_links.each { |_k, v| v.sort! }
end

#tally(url:, pages_crawled:, start:) ⇒ Object

Tally’s up various statistics about the crawl and its links.



109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/broken_link_finder/link_manager.rb', line 109

def tally(url:, pages_crawled:, start:)
  @crawl_stats[:url]               = url
  @crawl_stats[:pages_crawled]     = pages_crawled
  @crawl_stats[:num_pages]         = pages_crawled.size
  @crawl_stats[:num_links]         = (
    @all_broken_links.size + @all_intact_links.size + @all_ignored_links.size
  )
  @crawl_stats[:num_broken_links]  = @all_broken_links.size
  @crawl_stats[:num_intact_links]  = @all_intact_links.size
  @crawl_stats[:num_ignored_links] = @all_ignored_links.size
  @crawl_stats[:duration]          = Time.now - start
end