Class: WebStat::FetchAsWeb

Inherits:
Fetch
  • Object
show all
Defined in:
lib/web_stat/fetch/fetch_as_web.rb

Instance Attribute Summary

Attributes inherited from Fetch

#header, #html, #nokogiri, #status, #url, #userdic

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Fetch

#content, #eyecatch_image_path, #get_last_modified, #get_url, #save_local_path, #site_name, #stat, #title, #youtube_decscription

Constructor Details

#initialize(url) ⇒ FetchAsWeb

initialize class

Parameters:

  • url (String)


7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/web_stat/fetch/fetch_as_web.rb', line 7

def initialize(url)
  unless FetchAsWeb.url_valid?(url)
    raise WebStat::INVALID_URL, url
  end
  @url = original_url(url)
  if @url.match?(/\.pdf$/)
    title = nil
    body = nil
    URI.open(@url) do |io|
      reader = PDF::Reader.new(io)
      if reader.info.key?(:Title)
        title = reader.info[:Title]
      else
        title = File.basename(@url, ".pdf")
      end
      body = reader.pages.first.text
    end
    @html = <<-"EOS"
      <html>
      <head>
        <title>#{title}</title>
      </head>
      <body>
        #{body}
      </body>
      </html>
    EOS
  else
    @html = get_url(@url)
  end
  @nokogiri = ::Nokogiri::HTML(@html)
end

Class Method Details

.url_valid?(url) ⇒ Boolean

Validation url

Returns:

  • (Boolean)


41
42
43
44
# File 'lib/web_stat/fetch/fetch_as_web.rb', line 41

def url_valid?(url)
  regexp = Regexp.new("^https?://([a-z0-9][a-z0-9\\\-\.]{0,61})\\\.([a-z]{2,})(.*)?$", Regexp::IGNORECASE)
  regexp.match?(url)
end