Class: Macaron::Spawner

Inherits:
Object
  • Object
show all
Defined in:
lib/macaron/spawner.rb

Instance Method Summary collapse

Constructor Details

#initialize(url, options) ⇒ Spawner

Returns a new instance of Spawner.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/macaron/spawner.rb', line 7

def initialize(url, options)
  @options = options

  # threadpool(init workers, max workers, job timeout)
  threadpool = Threadpool.new(10, 10, job_timeout)

  # tasks saves the on-processing urls
  @tasks = Queue.new
  @tasks << url

  # parsed_urls used to prevent loop crawling
  @parsed_urls = [url]

  # awaiting_counter saves the awaiting task number
  @awaiting_counter = 1

  # bot is a webdriver
  bot = Watir::Browser.new if @options[:with_watir]

  loop do
    break if @awaiting_counter == 0

    begin
      Timeout::timeout(task_timeout) { url = @tasks.shift }
    rescue
      next
    end

    job = Macaron::Crawler.new(url, bot)
    job.add_observer(self)

    threadpool.load(job)
  end

  bot.close unless bot.nil?
end

Instance Method Details

#update(links) ⇒ Object



44
45
46
47
48
49
50
51
52
53
# File 'lib/macaron/spawner.rb', line 44

def update(links)
  @awaiting_counter -= 1
  links.each do |link|
    unless @parsed_urls.include?(link)
      @tasks << link
      @awaiting_counter += 1
    end
    @parsed_urls << link
  end
end