Class: Scrape::Application
- Inherits:
-
Object
- Object
- Scrape::Application
- Defined in:
- lib/scrape/application.rb
Instance Attribute Summary collapse
-
#history ⇒ Object
readonly
Returns the value of attribute history.
-
#loader ⇒ Object
readonly
Returns the value of attribute loader.
-
#options ⇒ Object
readonly
Returns the value of attribute options.
-
#scrapefile ⇒ Object
readonly
Returns the value of attribute scrapefile.
-
#sites ⇒ Object
readonly
Returns the value of attribute sites.
Instance Method Summary collapse
- #[](url) ⇒ Object
- #add_site(site, options = {}) ⇒ Object
- #enqueue(*urls) ⇒ Object
-
#initialize(scrapefile, options = {}, loader = Scrape::DefaultLoader) ⇒ Application
constructor
A new instance of Application.
- #load_scrapefile ⇒ Object
- #queue ⇒ Object
- #reset ⇒ Object
- #run ⇒ Object
Constructor Details
#initialize(scrapefile, options = {}, loader = Scrape::DefaultLoader) ⇒ Application
Returns a new instance of Application.
4 5 6 7 8 9 10 |
# File 'lib/scrape/application.rb', line 4 def initialize scrapefile, = {}, loader = Scrape::DefaultLoader @scrapefile = File. scrapefile @options = .dup @loader = loader.class == Class ? loader.new(self) : loader @sites = {} reset end |
Instance Attribute Details
#history ⇒ Object (readonly)
Returns the value of attribute history.
2 3 4 |
# File 'lib/scrape/application.rb', line 2 def history @history end |
#loader ⇒ Object (readonly)
Returns the value of attribute loader.
2 3 4 |
# File 'lib/scrape/application.rb', line 2 def loader @loader end |
#options ⇒ Object (readonly)
Returns the value of attribute options.
2 3 4 |
# File 'lib/scrape/application.rb', line 2 def @options end |
#scrapefile ⇒ Object (readonly)
Returns the value of attribute scrapefile.
2 3 4 |
# File 'lib/scrape/application.rb', line 2 def scrapefile @scrapefile end |
#sites ⇒ Object (readonly)
Returns the value of attribute sites.
2 3 4 |
# File 'lib/scrape/application.rb', line 2 def sites @sites end |
Instance Method Details
#[](url) ⇒ Object
47 48 49 |
# File 'lib/scrape/application.rb', line 47 def [] url @sites.values.detect{|site| site.accept? url } end |
#add_site(site, options = {}) ⇒ Object
51 52 53 54 55 56 57 58 |
# File 'lib/scrape/application.rb', line 51 def add_site site, = {} case site when String site = Scrape::Site.new site, .dup @sites.update site.to_s => site site end end |
#enqueue(*urls) ⇒ Object
41 42 43 44 45 |
# File 'lib/scrape/application.rb', line 41 def enqueue *urls urls.flatten.each do |url| @queue << url unless @history.include?(url) || @queue.include?(url) end end |
#load_scrapefile ⇒ Object
60 61 62 63 64 |
# File 'lib/scrape/application.rb', line 60 def load_scrapefile return if @scrapefile_loaded loader.load(scrapefile) @scrapefile_loaded = true end |
#queue ⇒ Object
37 38 39 |
# File 'lib/scrape/application.rb', line 37 def queue @queue.dup end |
#reset ⇒ Object
32 33 34 35 |
# File 'lib/scrape/application.rb', line 32 def reset @history = [] @queue = [] end |
#run ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/scrape/application.rb', line 12 def run load_scrapefile @queue = sites.values.map{|site| site.to_s } if @queue.empty? while url = @queue.shift @history << url if site = self[url] if urls = site.parse(url) enqueue *urls Scrape.logger.info "Parsed #{url}, found #{urls.length} urls." else Scrape.logger.info "Parsed #{url}." end else Scrape.logger.info "No rules defined for #{url}" end end end |