Class: Pagedump::Driver

Inherits:
Object
  • Object
show all
Defined in:
lib/pagedump/driver.rb

Overview

WARNING !! Not Thread-Safe

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeDriver



11
12
13
14
# File 'lib/pagedump/driver.rb', line 11

def initialize
  @wlinks = {}
  @data = {}
end

Class Method Details

.inherited(subclass) ⇒ Object



7
8
9
# File 'lib/pagedump/driver.rb', line 7

def self.inherited(subclass)
  Pagedump.register_driver subclass
end

Instance Method Details

#check(page) ⇒ Object



39
40
# File 'lib/pagedump/driver.rb', line 39

def check page
end

#data(key, value) ⇒ Object



16
17
18
19
# File 'lib/pagedump/driver.rb', line 16

def data key, value
  @data[key] ||= []
  @data[key] << value
end


21
22
23
24
# File 'lib/pagedump/driver.rb', line 21

def link weight, href
  abs_link = URI.parse(url).merge(URI.parse(href)).to_s
  @wlinks[abs_link] = weight
end

#nameObject



46
47
48
# File 'lib/pagedump/driver.rb', line 46

def name
  self.class.name
end

#scrapObject



26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/pagedump/driver.rb', line 26

def scrap
  @wlinks = {}
  Pagedump.logger.info "Getting headlines for url #{url}"
  agent = Mechanize.new
  page = agent.get(url)
  self.links page
  self.check page
  result = OpenStruct.new
  result.links = @wlinks
  result.data = @data
  result
end

#urlObject



42
43
44
# File 'lib/pagedump/driver.rb', line 42

def url
  self.class::URL
end