Class: DocmagoClient::HTMLResourceArchiver

Inherits:
Object
  • Object
show all
Defined in:
lib/docmago_client/html_resource_archiver.rb

Instance Method Summary collapse

Constructor Details

#initialize(html, base_path = '.') ⇒ HTMLResourceArchiver

Returns a new instance of HTMLResourceArchiver.



16
17
18
19
20
# File 'lib/docmago_client/html_resource_archiver.rb', line 16

def initialize(html, base_path='.')
  @html = html
  @base_path = base_path
  @doc = Nokogiri::HTML(@html)
end

Instance Method Details

#create_zip(file_path) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/docmago_client/html_resource_archiver.rb', line 22

def create_zip(file_path)
  Zip::File.open(file_path, Zip::File::CREATE) do |zipfile|
    zipfile.get_output_stream("document.html") { |f| f.write @html }
    
    fetch_uris.each do |uri|
      uri = Addressable::URI.parse uri.to_s.strip
      path_digest = Digest::MD5.hexdigest(normalize_uri(uri))
      
      file_data   = open(uri).read if uri.absolute?
      file_data ||= File.read(resolve_uri(uri)) if File.exists?(resolve_uri(uri))
      
      zipfile.get_output_stream(path_digest) { |f| f.write file_data } if file_data
    end
  end

  file_path
end