Class: Macaron::Spawner

Inherits:
Object
  • Object
show all
Defined in:
lib/macaron/spawner.rb

Constant Summary collapse

DEFALUT_OPTIONS =
{
  :nokogiri_timeout_seconds => 30,
  :thread_timeout_seconds => 40,
  :pages => 1000,
  :initial_workers => 1,
  :maximum_workers => 1,
  :in_site_crawling => true,
  :with_waltir => false
}.freeze

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Spawner

Returns a new instance of Spawner.



25
26
27
28
29
30
31
32
# File 'lib/macaron/spawner.rb', line 25

def initialize(options = {})
  @@options = DEFALUT_OPTIONS.merge(options)
  @threadpool = Threadpool.new(
    @@options[:initial_workers], 
    @@options[:maximum_workers], 
    @@options[:thread_timeout_seconds]
  )
end

Instance Method Details

#dig(url, init_depth = 3) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/macaron/spawner.rb', line 42

def dig(url, init_depth=3)
  @@task_map = @@task_map.put(url, init_depth)
  loop do
    @@task_map = @@task_map.remove {|url, depth| 
      @@parsed_urls = @@parsed_urls.add(url)

      if @@options[:with_waltir]
        html = get_html_via_waltir(url)
        @threadpool.load(Processor.new(url, depth, html))  
      else
        @threadpool.load(Processor.new(url, depth))
      end          
    }

    break if @threadpool.busy_workers_count == 0 && @@task_map.empty?

    if @@success_times > @@options[:pages]
      print "Fetched pages exceeds the limit #{@@options[:pages]}\n"
      break
    end
  end

  @bot.close unless @bot.nil?

  puts "result: #{@@result.size}, #{@@result.keys}" if @@options[:debug]
end

#fail_timesObject



38
39
40
# File 'lib/macaron/spawner.rb', line 38

def fail_times
  @@fail_times
end

#success_timesObject



34
35
36
# File 'lib/macaron/spawner.rb', line 34

def success_times
  @@success_times
end