Class: Grabber::Page

Inherits:
Object
  • Object
show all
Includes:
Util
Defined in:
lib/grabber/page.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Util

#format_url, #strip_non_url_parts, #with_url_protocol

Constructor Details

#initialize(url) ⇒ Page

Returns a new instance of Page.



6
7
8
9
10
# File 'lib/grabber/page.rb', line 6

def initialize(url)
  @url = url
  @assets = []
  @links = []
end

Instance Attribute Details

Returns the value of attribute links.



4
5
6
# File 'lib/grabber/page.rb', line 4

def links
  @links
end

Instance Method Details

#basenameObject



38
39
40
41
42
43
44
# File 'lib/grabber/page.rb', line 38

def basename
  if uri.path.nil? || uri.path == ''
    "index.html"
  else
    uri.path.split('/').last + ".html"
  end
end

#contentObject



34
35
36
# File 'lib/grabber/page.rb', line 34

def content
  Nokogiri::HTML(uri.read)
end

#crawlObject



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/grabber/page.rb', line 12

def crawl
  puts "Grabbing: #{uri.to_s}"

  content.search('img').each do |asset|
    @assets << asset['src']
  end

  content.search('a').each do |asset|
    location = asset['href']
    next if location.nil? || location == '' || location[/^#/]

    @links << location # if on same domain
  end

  @links.compact!
  @links.uniq! if @links
end

#download(directory) ⇒ Object



46
47
48
49
50
51
# File 'lib/grabber/page.rb', line 46

def download(directory)
  local_path = File.expand_path(File.join(directory, basename))
  File.open(local_path, "wb") do |file|
    file.write open(uri).read
  end
end

#download_assets(directory) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/grabber/page.rb', line 53

def download_assets(directory)
  @assets.each do |asset|
    local_path = File.expand_path(File.join(directory, File.basename(asset)))
    File.open(local_path, "wb") do |file|
      begin
        file.write open(format_url(asset)).read
      rescue OpenURI::HTTPError => e
        puts "Failed download for #{format_url(asset)}: #{e.message}"
      end
    end
  end
end

#uriObject



30
31
32
# File 'lib/grabber/page.rb', line 30

def uri
  URI.parse(@url)
end