Class: Gonzui::HTTPFetcher
Overview
FIXME: very ad hoc implementation
Instance Attribute Summary
#temporary_directory
Class Method Summary
collapse
Instance Method Summary
collapse
#clean_temporary_directory, #prepare_temporary_directory, #set_temporary_directory
#add_finishing_proc, #exclude?, #finish, #package_name
Methods included from Util
assert, assert_equal, assert_equal_all, assert_non_nil, assert_not_reached, benchmark, command_exist?, commify, eprintf, format_bytes, program_name, protect_from_signals, require_command, set_verbosity, shell_escape, unix?, vprintf, windows?, wprintf
Constructor Details
#initialize(config, source_uri, options) ⇒ HTTPFetcher
Returns a new instance of HTTPFetcher.
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
|
# File 'lib/gonzui/fetcher.rb', line 152
def initialize(config, source_uri, options)
super(config, source_uri, options)
begin
open(source_uri.to_s) {|f|
@content = f.read
@content_type = f.content_type
@base_uri = f.base_uri
}
rescue OpenURI::HTTPError => e
raise FetchFailed.new("#{source_uri.to_s}: #{e.message}")
end
unless /\/$/.match(@base_uri.path) @base_uri.path = File.dirname(@base_uri.path) + "/"
end
set_temporary_directory(@config.temporary_directory)
end
|
Class Method Details
.scheme ⇒ Object
148
149
150
|
# File 'lib/gonzui/fetcher.rb', line 148
def self.scheme
"http"
end
|
Instance Method Details
#collect ⇒ Object
200
201
202
203
204
205
206
207
208
209
210
211
212
|
# File 'lib/gonzui/fetcher.rb', line 200
def collect
relative_paths = []
@content.scan(/href=(["'])(.*?)\1/i).each {|qmark, link|
u = URI.parse(link)
next if u.path.nil?
u.path.chomp!("/")
next unless u.relative?
next if /^\./.match(u.path)
next if exclude?(u.path)
relative_paths.push(u.path)
}
return relative_paths
end
|
#fetch(relative_path) ⇒ Object
190
191
192
193
194
195
196
197
198
|
# File 'lib/gonzui/fetcher.rb', line 190
def fetch(relative_path)
uri = restore_uri(relative_path)
content = mtime = nil
open(uri.to_s) {|f|
content = f.read
mtime = f.last_modified
}
return Content.new(content, mtime)
end
|
181
182
183
184
185
186
187
188
|
# File 'lib/gonzui/fetcher.rb', line 181
def
prepare_temporary_directory
tmp_name = File.join(self.temporary_directory,
File.basename(@source_uri.path))
File.open(tmp_name, "w") {|f| f.write(@content) }
add_finishing_proc(lambda { clean_temporary_directory })
return Extractor.new(@config, tmp_name)
end
|
177
178
179
|
# File 'lib/gonzui/fetcher.rb', line 177
def
@content_type != "text/html"
end
|
#restore_uri(relative_path) ⇒ Object
171
172
173
174
|
# File 'lib/gonzui/fetcher.rb', line 171
def restore_uri(relative_path)
u = @base_uri.to_s + relative_path
URI.parse(u)
end
|