Class: WebCrawler::Application
- Defined in:
- lib/web_crawler/application.rb
Instance Method Summary collapse
- #factory(pattern, *params) ⇒ Object
- #get(url, *urls) ⇒ Object
- #runner(name) ⇒ Object
- #show_urls(url, *urls) ⇒ Object
- #test ⇒ Object
- #version ⇒ Object
Methods inherited from CLI
Methods included from Thor::InheritedOptions
Methods included from Thor::Hooks
Instance Method Details
#factory(pattern, *params) ⇒ Object
67 68 69 70 71 72 73 74 75 76 |
# File 'lib/web_crawler/application.rb', line 67 def factory(pattern, *params) params.map! { |param| eval(param) } urls = FactoryUrl.new(pattern, params) sep = [:list] ? "\n" : ' ' if [:output] || [:list] puts urls.factory.map { |u| u.inspect }.join(sep).gsub('"', "'") else get *urls.factory end end |
#get(url, *urls) ⇒ Object
43 44 45 46 47 48 |
# File 'lib/web_crawler/application.rb', line 43 def get(url, *urls) urls.unshift url batch = BatchRequest.new(*urls, ) batch.process end |
#runner(name) ⇒ Object
30 31 32 33 34 35 36 37 |
# File 'lib/web_crawler/application.rb', line 30 def runner(name) $:.unshift './' Array.wrap(@options[:lib]).each { |l| $:.unshift l } require name.underscore klass = name.classify.constantize klass.run allow_format(:json, :yaml) end |
#show_urls(url, *urls) ⇒ Object
55 56 57 58 59 60 |
# File 'lib/web_crawler/application.rb', line 55 def show_urls(url, *urls) urls.unshift url batch = BatchRequest.new(*urls, ) [:cols] ||= 1 Follower.new(batch.process, same_host: ['same-host']).collect.first.in_groups_of([:cols], "") end |
#test ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/web_crawler/application.rb', line 9 def test urls = FactoryUrl.new('http://www.superjob.ru/rabota/554/veb-programmist/?from=$1', [[140]]).factory logger.info "start requests with #{urls.join(' ')} in 4 processes" targets = BatchRequest.new(urls).process logger.info "#{targets.size} targets collected" urls = Follower.new(targets, same_host: false).collect { |url| url =~ /vacancy\/\?id=\d+/ } logger.info "#{urls.size} urls collected" logger.info "start requests with in 4 processes" puts BatchRequest.new(urls).process.inspect "" end |
#version ⇒ Object
79 80 81 |
# File 'lib/web_crawler/application.rb', line 79 def version WebCrawler::VERSION::STRING end |