Class: WebPageArchiver::DataUriHtmlGenerator
- Inherits:
-
Object
- Object
- WebPageArchiver::DataUriHtmlGenerator
- Includes:
- GeneratorHelpers
- Defined in:
- lib/web_page_archiver.rb
Overview
self-containing data-uri based html
Instance Attribute Summary collapse
-
#conf ⇒ Object
Returns the value of attribute conf.
Class Method Summary collapse
-
.generate(filename_or_uri) ⇒ String
generate self-containing data-uri based html file (html) file without instantiating a MhtmlGenerator object.
Instance Method Summary collapse
-
#convert(filename_or_uri) ⇒ String
convert object at uri to self-contained text-file.
-
#set_contents ⇒ Object
replaces content-placeholders with actual content.
Methods included from GeneratorHelpers
#content_type, #download_finished?, #initialize, #join_uri, #start_download_thread
Instance Attribute Details
#conf ⇒ Object
Returns the value of attribute conf.
202 203 204 |
# File 'lib/web_page_archiver.rb', line 202 def conf @conf end |
Class Method Details
.generate(filename_or_uri) ⇒ String
generate self-containing data-uri based html file (html) file without instantiating a MhtmlGenerator object
mhtml = WebPageArchiver::DataUriHtmlGenerator.generate(“rubygems.org/”) open(“output.html”, “w+”){|f| f.write mhtml }
211 212 213 214 |
# File 'lib/web_page_archiver.rb', line 211 def DataUriHtmlGenerator.generate(filename_or_uri) generateror = DataUriHtmlGenerator.new return generateror.convert(filename_or_uri) end |
Instance Method Details
#convert(filename_or_uri) ⇒ String
convert object at uri to self-contained text-file
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
# File 'lib/web_page_archiver.rb', line 220 def convert(filename_or_uri) @parser = Nokogiri::HTML(open(filename_or_uri)) @parser.search('img').each{|i| uri = i.attr('src'); uri = join_uri( filename_or_uri, uri).to_s uid = Digest::MD5.hexdigest(uri) @contents[uid] = {:uri=>uri, :parser_ref=>i, :attribute_name=>'src'} i.set_attribute('src',"cid:#{uid}") } #styles @parser.search('link[rel=stylesheet]').each{|i| uri = i.attr('href'); uri = join_uri( filename_or_uri, uri) uid = Digest::MD5.hexdigest(uri) @contents[uid] = {:uri=>uri, :parser_ref=>i, :attribute_name=>'href'} i.set_attribute('href',"cid:#{uid}") } #scripts @parser.search('script').map{ |i| next unless i.attr('src'); uri = i.attr('src'); uri = join_uri( filename_or_uri, uri) uid = Digest::MD5.hexdigest(uri) @contents[uid] = {:uri=>uri, :parser_ref=>i, :attribute_name=>'src'} i.set_attribute('src',"cid:#{uid}") } self.set_contents return @parser.to_s end |
#set_contents ⇒ Object
replaces content-placeholders with actual content
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/web_page_archiver.rb', line 251 def set_contents #prepeare_queue @contents.each{|k,v| @queue.push k} #start download threads self.start_download_thread # wait until download finished. @threads.each{|t|t.join} @contents.each do |k,v| content_benc=Base64.encode64(v[:body]).gsub(/\n/,'') tag=v[:parser_ref] attribute=v[:attribute_name] content_type=v[:content_type] tag.set_attribute(attribute,"data:#{content_type};base64,#{content_benc}") end end |