Class: Seep::Fetcher
- Inherits:
-
Object
- Object
- Seep::Fetcher
- Defined in:
- lib/seep/fetcher.rb
Constant Summary collapse
- AGENT =
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2'
Instance Attribute Summary collapse
-
#body ⇒ Object
readonly
Returns the value of attribute body.
-
#curb ⇒ Object
readonly
Returns the value of attribute curb.
-
#max_file_size ⇒ Object
Returns the value of attribute max_file_size.
-
#request_headers ⇒ Object
Returns the value of attribute request_headers.
-
#response_headers ⇒ Object
Returns the value of attribute response_headers.
-
#size ⇒ Object
readonly
Returns the value of attribute size.
-
#url ⇒ Object
Returns the value of attribute url.
Class Method Summary collapse
Instance Method Summary collapse
- #content_type ⇒ Object
- #dest_url ⇒ Object
- #doc? ⇒ Boolean
- #export(path) ⇒ Object
- #ext ⇒ Object
- #image? ⇒ Boolean
-
#initialize(url, options = {}) ⇒ Fetcher
constructor
A new instance of Fetcher.
- #inspect ⇒ Object
- #open(redirect = 0) ⇒ Object
- #register_on_body! ⇒ Object
- #register_on_header! ⇒ Object
- #to_doc ⇒ Object
- #to_image ⇒ Object
Constructor Details
#initialize(url, options = {}) ⇒ Fetcher
Returns a new instance of Fetcher.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/seep/fetcher.rb', line 8 def initialize(url, = {}) self.url = url self.request_headers = {}.tap do |head| head['User-Agent'] = [:user_agent] || AGENT end self.response_headers = {} self.max_file_size = [:max_file_size] || 1_048_576 # 1MB @curb = Curl::Easy.new(url) @curb.follow_location = true @curb.max_redirects = [:max_redirects] || 5 register_on_header! register_on_body! end |
Instance Attribute Details
#body ⇒ Object (readonly)
Returns the value of attribute body.
4 5 6 |
# File 'lib/seep/fetcher.rb', line 4 def body @body end |
#curb ⇒ Object (readonly)
Returns the value of attribute curb.
4 5 6 |
# File 'lib/seep/fetcher.rb', line 4 def curb @curb end |
#max_file_size ⇒ Object
Returns the value of attribute max_file_size.
5 6 7 |
# File 'lib/seep/fetcher.rb', line 5 def max_file_size @max_file_size end |
#request_headers ⇒ Object
Returns the value of attribute request_headers.
6 7 8 |
# File 'lib/seep/fetcher.rb', line 6 def request_headers @request_headers end |
#response_headers ⇒ Object
Returns the value of attribute response_headers.
6 7 8 |
# File 'lib/seep/fetcher.rb', line 6 def response_headers @response_headers end |
#size ⇒ Object (readonly)
Returns the value of attribute size.
4 5 6 |
# File 'lib/seep/fetcher.rb', line 4 def size @size end |
#url ⇒ Object
Returns the value of attribute url.
5 6 7 |
# File 'lib/seep/fetcher.rb', line 5 def url @url end |
Class Method Details
.open(url, options = {}) ⇒ Object
76 77 78 |
# File 'lib/seep/fetcher.rb', line 76 def self.open(url, = {}) self.new(url, ).open end |
Instance Method Details
#content_type ⇒ Object
24 25 26 |
# File 'lib/seep/fetcher.rb', line 24 def content_type response_headers['Content-Type'] end |
#dest_url ⇒ Object
28 29 30 |
# File 'lib/seep/fetcher.rb', line 28 def dest_url curb.last_effective_url || url end |
#doc? ⇒ Boolean
68 69 70 |
# File 'lib/seep/fetcher.rb', line 68 def doc? content_type == "text/html" end |
#export(path) ⇒ Object
43 44 45 46 47 |
# File 'lib/seep/fetcher.rb', line 43 def export(path) File.open(path, 'w') do |file| file.write(@body) end end |
#ext ⇒ Object
49 50 51 52 53 54 55 56 57 58 |
# File 'lib/seep/fetcher.rb', line 49 def ext case content_type when "image/jpeg"; ".jpg" when "image/png" ; ".png" when "image/gif" ; ".gif" when "text/html" ; ".html" when "text/plain"; ".txt" else; "" end end |
#image? ⇒ Boolean
60 61 62 |
# File 'lib/seep/fetcher.rb', line 60 def image? (!! content_type =~ /^image/) and to_image.valid? end |
#inspect ⇒ Object
39 40 41 |
# File 'lib/seep/fetcher.rb', line 39 def inspect "#<Seep::Fetcher #{ content_type.nil? ? dest_url : content_type + ' ' + dest_url }>" end |
#open(redirect = 0) ⇒ Object
32 33 34 35 36 37 |
# File 'lib/seep/fetcher.rb', line 32 def open(redirect = 0) @body = ""; @size = 0 curb.headers = request_headers curb.perform self end |
#register_on_body! ⇒ Object
96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/seep/fetcher.rb', line 96 def register_on_body! curb.on_body do |body| @body += body @size = @body.length if @size > max_file_size @size = -1 else body.length end end end |
#register_on_header! ⇒ Object
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/seep/fetcher.rb', line 80 def register_on_header! curb.on_header do |header| key, value = header.split(":", 2) unless key.nil? or value.nil? key.strip!; value.strip! @size = value.to_i if key == "Content-Length" response_headers[key] = value end if @size > max_file_size @size = -1 else header.length end end end |