Class: Searcher::MultipleCrawler::Crawler

Inherits:
Object
  • Object
show all
Defined in:
lib/searcher/spider.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(user_agent = Global::UserAgent, redirect_limit = 1) ⇒ Crawler

Returns a new instance of Crawler.



10
11
12
13
14
# File 'lib/searcher/spider.rb', line 10

def initialize(user_agent=Global::UserAgent, redirect_limit=1)
  @user_agent = user_agent
  @redirect_limit = redirect_limit
  @timeout = 20
end

Instance Attribute Details

#redirect_limitObject

Returns the value of attribute redirect_limit.



16
17
18
# File 'lib/searcher/spider.rb', line 16

def redirect_limit
  @redirect_limit
end

#timeoutObject

Returns the value of attribute timeout.



16
17
18
# File 'lib/searcher/spider.rb', line 16

def timeout
  @timeout
end

#user_agentObject

Returns the value of attribute user_agent.



16
17
18
# File 'lib/searcher/spider.rb', line 16

def user_agent
  @user_agent
end

Instance Method Details

#fetch(website, selector = '') ⇒ Object



18
19
20
21
22
23
24
# File 'lib/searcher/spider.rb', line 18

def fetch(website,selector='')
    p "Pid:#{Process.pid}, fetch: #{website}\n"
    res = Global.get_whole_response(website,@user_agent,@timeout)
    html = Global.get_whole_html(res,@user_agent,@timeout)
    doc = Nokogiri::HTML(html)
    #doc.css(selector)  if selector != ''
end