Class: Crawlette::Page

Inherits:
Object
  • Object
show all
Defined in:
lib/crawlette/page.rb

Constant Summary collapse

MAILTO_REGEX =
/^mailto:/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html, uri) ⇒ Page

Returns a new instance of Page.



8
9
10
11
# File 'lib/crawlette/page.rb', line 8

def initialize(html, uri)
  @html = html
  @uri  = uri
end

Instance Attribute Details

#uriObject (readonly)

Returns the value of attribute uri.



6
7
8
# File 'lib/crawlette/page.rb', line 6

def uri
  @uri
end

Instance Method Details

#assetsObject



17
18
19
20
21
22
23
24
25
# File 'lib/crawlette/page.rb', line 17

def assets
  @assets ||= begin
    urls = document.css('[src]').map { |a| a["src"] }
    urls += document.css('link[rel="stylesheet"][href]').map { |a| a["href"] }
    urls += document.css('meta[name^="og:image"]').map { |a| a["content"] }

    sanitize_urls(urls, external_links: true)
  end
end


13
14
15
# File 'lib/crawlette/page.rb', line 13

def links
  @links ||= sanitize_urls(document.css('a[href]').map { |a| a["href"] })
end