Class: Scrape::Application

Inherits:
Object
  • Object
show all
Defined in:
lib/scrape/application.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(scrapefile, options = {}, loader = Scrape::DefaultLoader) ⇒ Application

Returns a new instance of Application.



4
5
6
7
8
9
10
# File 'lib/scrape/application.rb', line 4

def initialize scrapefile, options = {}, loader = Scrape::DefaultLoader
  @scrapefile = File.expand_path scrapefile
  @options = options.dup
  @loader = loader.class == Class ? loader.new(self) : loader
  @sites = {}
  reset
end

Instance Attribute Details

#historyObject (readonly)

Returns the value of attribute history.



2
3
4
# File 'lib/scrape/application.rb', line 2

def history
  @history
end

#loaderObject (readonly)

Returns the value of attribute loader.



2
3
4
# File 'lib/scrape/application.rb', line 2

def loader
  @loader
end

#optionsObject (readonly)

Returns the value of attribute options.



2
3
4
# File 'lib/scrape/application.rb', line 2

def options
  @options
end

#scrapefileObject (readonly)

Returns the value of attribute scrapefile.



2
3
4
# File 'lib/scrape/application.rb', line 2

def scrapefile
  @scrapefile
end

#sitesObject (readonly)

Returns the value of attribute sites.



2
3
4
# File 'lib/scrape/application.rb', line 2

def sites
  @sites
end

Instance Method Details

#[](url) ⇒ Object



47
48
49
# File 'lib/scrape/application.rb', line 47

def [] url
  @sites.values.detect{|site| site.accept? url }
end

#add_site(site, options = {}) ⇒ Object



51
52
53
54
55
56
57
58
# File 'lib/scrape/application.rb', line 51

def add_site site, options = {}
  case site
  when String
    site = Scrape::Site.new site, options.dup
    @sites.update site.to_s => site
    site
  end
end

#enqueue(*urls) ⇒ Object



41
42
43
44
45
# File 'lib/scrape/application.rb', line 41

def enqueue *urls
  urls.flatten.each do |url|
    @queue << url unless @history.include?(url) || @queue.include?(url)
  end
end

#load_scrapefileObject



60
61
62
63
64
# File 'lib/scrape/application.rb', line 60

def load_scrapefile
  return if @scrapefile_loaded
  loader.load(scrapefile)
  @scrapefile_loaded = true
end

#queueObject



37
38
39
# File 'lib/scrape/application.rb', line 37

def queue
  @queue.dup
end

#resetObject



32
33
34
35
# File 'lib/scrape/application.rb', line 32

def reset
  @history = []
  @queue = []
end

#runObject



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/scrape/application.rb', line 12

def run
  load_scrapefile

  @queue = sites.values.map{|site| site.to_s } if @queue.empty?

  while url = @queue.shift
    @history << url
    if site = self[url]
      if urls = site.parse(url)
        enqueue *urls
        Scrape.logger.info "Parsed #{url}, found #{urls.length} urls."
      else
        Scrape.logger.info "Parsed #{url}."
      end
    else
      Scrape.logger.info "No rules defined for #{url}"
    end
  end
end