Class: Searcher::MultipleCrawler::Crawler
- Inherits:
-
Object
- Object
- Searcher::MultipleCrawler::Crawler
- Defined in:
- lib/searcher/spider.rb
Instance Attribute Summary collapse
-
#redirect_limit ⇒ Object
Returns the value of attribute redirect_limit.
-
#timeout ⇒ Object
Returns the value of attribute timeout.
-
#user_agent ⇒ Object
Returns the value of attribute user_agent.
Instance Method Summary collapse
- #fetch(website, selector = '') ⇒ Object
-
#initialize(user_agent = Global::UserAgent, redirect_limit = 1) ⇒ Crawler
constructor
A new instance of Crawler.
Constructor Details
#initialize(user_agent = Global::UserAgent, redirect_limit = 1) ⇒ Crawler
Returns a new instance of Crawler.
10 11 12 13 14 |
# File 'lib/searcher/spider.rb', line 10 def initialize(user_agent=Global::UserAgent, redirect_limit=1) @user_agent = user_agent @redirect_limit = redirect_limit @timeout = 20 end |
Instance Attribute Details
#redirect_limit ⇒ Object
Returns the value of attribute redirect_limit.
16 17 18 |
# File 'lib/searcher/spider.rb', line 16 def redirect_limit @redirect_limit end |
#timeout ⇒ Object
Returns the value of attribute timeout.
16 17 18 |
# File 'lib/searcher/spider.rb', line 16 def timeout @timeout end |
#user_agent ⇒ Object
Returns the value of attribute user_agent.
16 17 18 |
# File 'lib/searcher/spider.rb', line 16 def user_agent @user_agent end |
Instance Method Details
#fetch(website, selector = '') ⇒ Object
18 19 20 21 22 23 24 |
# File 'lib/searcher/spider.rb', line 18 def fetch(website,selector='') p "Pid:#{Process.pid}, fetch: #{website}\n" res = Global.get_whole_response(website,@user_agent,@timeout) html = Global.get_whole_html(res,@user_agent,@timeout) doc = Nokogiri::HTML(html) #doc.css(selector) if selector != '' end |