Module: MLS::CLI::Documents

Defined in:
lib/mls/cli/documents.rb

Class Method Summary collapse

Class Method Details

.backup(dir) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/mls/cli/documents.rb', line 31

def self.backup(dir)
  last_sync_file = File.join(dir, '.last_sync')
  storage_engines = {}
  storage_engines[:s3] = StandardStorage::S3.new(MLS::CLI.options[:s3]) if MLS::CLI.options[:s3]
  storage_engines[:b2] = StandardStorage::B2.new(MLS::CLI.options[:b2]) if MLS::CLI.options[:b2]
  
  query = if File.exist?(last_sync_file)
    from_timestamp = Time.iso8601(File.read(last_sync_file))
    Document.where(Document.arel_table['created_at'].gteq(from_timestamp))
  else
    Document.all
  end
  
  query.find_each do |document|
    if document.sha256 && File.exists?(File.join(dir, partition(document.sha256)))
      puts "Downloaded #{document.id}"
      next
    end
  
    if document.provider.nil?
      storage_engine = storage_engines[:s3]
      key = 'hash_key'
    elsif document.provider.include?('b2/sha256')
      storage_engine = storage_engines[:b2]
      key = 'sha256'
    elsif document.provider.include?('s3/hash_key')
      storage_engine = storage_engines[:s3]
      key = 'hash_key'
    else
      raise 'unkown storage engine'
    end
    
    print "Downloading #{document.id.to_s.ljust(7)} "
    storage_engine.copy_to_tempfile(document.send(key)) do |file|
      digests = calculate_digest(file)
      puts partition(document.sha256)
      
      raise 'MD5 does not match' if digests[:md5] != document.md5
      document.update!(digests.merge({provider: ['s3/hash_key']}))
      
      FileUtils.mkdir_p(File.dirname(File.join(dir, partition(document.sha256))))
      FileUtils.mv(file.path, File.join(dir, partition(document.sha256)))
    end
    
    File.write(last_sync_file, document.created_at.iso8601(6))
  end
end

.calculate_digest(file) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/mls/cli/documents.rb', line 11

def self.calculate_digest(file)
  md5_digest = Digest::MD5.new
  sha1_digest = Digest::SHA1.new
  sha256_digest = Digest::SHA256.new

  buf = ""
  file.rewind
  while file.read(16384, buf)
    md5_digest << buf
    sha1_digest << buf
    sha256_digest << buf
  end

  {
    md5: md5_digest.hexdigest,
    sha1: sha1_digest.hexdigest,
    sha256: sha256_digest.hexdigest
  }
end

.partition(value, depth: 5) ⇒ Object



6
7
8
9
# File 'lib/mls/cli/documents.rb', line 6

def self.partition(value, depth: 5)
  split = value.scan(/.{1,4}/)
  split.shift(depth).join("/") + split.join("")
end