Class: Gonzui::HTTPFetcher

Inherits:
AbstractFetcher show all
Includes:
TemporaryDirectoryUtil
Defined in:
lib/gonzui/fetcher.rb

Overview

FIXME: very ad hoc implementation

Instance Attribute Summary

Attributes included from TemporaryDirectoryUtil

#temporary_directory

Class Method Summary collapse

Instance Method Summary collapse

Methods included from TemporaryDirectoryUtil

#clean_temporary_directory, #prepare_temporary_directory, #set_temporary_directory

Methods inherited from AbstractFetcher

#add_finishing_proc, #exclude?, #finish, #package_name

Methods included from Util

assert, assert_equal, assert_equal_all, assert_non_nil, assert_not_reached, benchmark, command_exist?, commify, eprintf, format_bytes, program_name, protect_from_signals, require_command, set_verbosity, shell_escape, unix?, vprintf, windows?, wprintf

Constructor Details

#initialize(config, source_uri, options) ⇒ HTTPFetcher

Returns a new instance of HTTPFetcher.



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/gonzui/fetcher.rb', line 152

def initialize(config, source_uri, options)
  super(config, source_uri, options)
  begin
    open(source_uri.to_s) {|f| 
      @content = f.read
      @content_type = f.content_type
      @base_uri = f.base_uri
    }
  rescue OpenURI::HTTPError => e
    raise FetchFailed.new("#{source_uri.to_s}: #{e.message}")
  end

  # http://example.com/foo/index.html => http://example.com/foo/
  unless /\/$/.match(@base_uri.path) #/
    @base_uri.path = File.dirname(@base_uri.path) + "/"
  end
  set_temporary_directory(@config.temporary_directory)
end

Class Method Details

.schemeObject



148
149
150
# File 'lib/gonzui/fetcher.rb', line 148

def self.scheme
  "http"
end

Instance Method Details

#collectObject



200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/gonzui/fetcher.rb', line 200

def collect
  relative_paths = []
  @content.scan(/href=(["'])(.*?)\1/i).each {|qmark, link|
    u = URI.parse(link)
    next if u.path.nil?
    u.path.chomp!("/")
    next unless u.relative?
    next if /^\./.match(u.path)
    next if exclude?(u.path)
    relative_paths.push(u.path)
  }
  return relative_paths
end

#fetch(relative_path) ⇒ Object



190
191
192
193
194
195
196
197
198
# File 'lib/gonzui/fetcher.rb', line 190

def fetch(relative_path)
  uri = restore_uri(relative_path)
  content = mtime = nil
  open(uri.to_s) {|f| 
    content = f.read 
    mtime = f.last_modified
  }
  return Content.new(content, mtime)
end

#get_extractorObject



181
182
183
184
185
186
187
188
# File 'lib/gonzui/fetcher.rb', line 181

def get_extractor
  prepare_temporary_directory
  tmp_name = File.join(self.temporary_directory, 
                       File.basename(@source_uri.path))
  File.open(tmp_name, "w") {|f| f.write(@content) }
  add_finishing_proc(lambda { clean_temporary_directory })
  return Extractor.new(@config, tmp_name)
end

#need_extraction?Boolean

Returns:

  • (Boolean)


177
178
179
# File 'lib/gonzui/fetcher.rb', line 177

def need_extraction?
  @content_type != "text/html"
end

#restore_uri(relative_path) ⇒ Object



171
172
173
174
# File 'lib/gonzui/fetcher.rb', line 171

def restore_uri(relative_path)
  u = @base_uri.to_s + relative_path
  URI.parse(u)
end