Class: WebStat::FetchAsWeb
- Defined in:
- lib/web_stat/fetch/fetch_as_web.rb
Instance Attribute Summary
Attributes inherited from Fetch
#header, #html, #nokogiri, #status, #url, #userdic
Class Method Summary collapse
-
.url_valid?(url) ⇒ Boolean
Validation url.
Instance Method Summary collapse
-
#initialize(url) ⇒ FetchAsWeb
constructor
initialize class.
Methods inherited from Fetch
#content, #eyecatch_image_path, #get_last_modified, #get_url, #save_local_path, #site_name, #stat, #title, #youtube_decscription
Constructor Details
#initialize(url) ⇒ FetchAsWeb
initialize class
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/web_stat/fetch/fetch_as_web.rb', line 7 def initialize(url) unless FetchAsWeb.url_valid?(url) raise WebStat::INVALID_URL, url end @url = original_url(url) if @url.match?(/\.pdf$/) title = nil body = nil URI.open(@url) do |io| reader = PDF::Reader.new(io) if reader.info.key?(:Title) title = reader.info[:Title] else title = File.basename(@url, ".pdf") end body = reader.pages.first.text end @html = " <html>\n <head>\n <title>\#{title}</title>\n </head>\n <body>\n \#{body}\n </body>\n </html>\n EOS\n else\n @html = get_url(@url)\n end\n @nokogiri = ::Nokogiri::HTML(@html)\nend\n" |
Class Method Details
.url_valid?(url) ⇒ Boolean
Validation url
41 42 43 44 |
# File 'lib/web_stat/fetch/fetch_as_web.rb', line 41 def url_valid?(url) regexp = Regexp.new("^https?://([a-z0-9][a-z0-9\\\-\.]{0,61})\\\.([a-z]{2,})(.*)?$", Regexp::IGNORECASE) regexp.match?(url) end |