Class: Grabber::Page
- Inherits:
-
Object
show all
- Includes:
- Util
- Defined in:
- lib/grabber/page.rb
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods included from Util
#format_url, #strip_non_url_parts, #with_url_protocol
Constructor Details
#initialize(url) ⇒ Page
Returns a new instance of Page.
6
7
8
9
10
|
# File 'lib/grabber/page.rb', line 6
def initialize(url)
@url = url
@assets = []
@links = []
end
|
Instance Attribute Details
#links ⇒ Object
Returns the value of attribute links.
4
5
6
|
# File 'lib/grabber/page.rb', line 4
def links
@links
end
|
Instance Method Details
#basename ⇒ Object
38
39
40
41
42
43
44
|
# File 'lib/grabber/page.rb', line 38
def basename
if uri.path.nil? || uri.path == ''
"index.html"
else
uri.path.split('/').last + ".html"
end
end
|
#content ⇒ Object
34
35
36
|
# File 'lib/grabber/page.rb', line 34
def content
Nokogiri::HTML(uri.read)
end
|
#crawl ⇒ Object
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
# File 'lib/grabber/page.rb', line 12
def crawl
puts "Grabbing: #{uri.to_s}"
content.search('img').each do |asset|
@assets << asset['src']
end
content.search('a').each do |asset|
location = asset['href']
next if location.nil? || location == '' || location[/^#/]
@links << location end
@links.compact!
@links.uniq! if @links
end
|
#download(directory) ⇒ Object
46
47
48
49
50
51
|
# File 'lib/grabber/page.rb', line 46
def download(directory)
local_path = File.expand_path(File.join(directory, basename))
File.open(local_path, "wb") do |file|
file.write open(uri).read
end
end
|
#download_assets(directory) ⇒ Object
53
54
55
56
57
58
59
60
61
62
63
64
|
# File 'lib/grabber/page.rb', line 53
def download_assets(directory)
@assets.each do |asset|
local_path = File.expand_path(File.join(directory, File.basename(asset)))
File.open(local_path, "wb") do |file|
begin
file.write open(format_url(asset)).read
rescue OpenURI::HTTPError => e
puts "Failed download for #{format_url(asset)}: #{e.message}"
end
end
end
end
|
#uri ⇒ Object
30
31
32
|
# File 'lib/grabber/page.rb', line 30
def uri
URI.parse(@url)
end
|