Class: Figshare::Upload

Inherits:
PrivateArticles show all
Defined in:
lib/upload.rb

Overview

Upload files to figshare Nb. This can sometimes fail, so you need to check the md5 to ensure the file got there

It can take a short while for the md5 to be calculated, so upload, wait, then check for a computed_md5.
The status will show as "ic_checking",  "moving_to_final" then to "available", 
I have seen it stuck at "moving_to_final", but with the right computed_md5.

Constant Summary collapse

CHUNK_SIZE =
1048576

Instance Attribute Summary collapse

Attributes inherited from Base

#api_url, #article_index_file, #auth_token, #base_dir, #hostname, #institute_id

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from PrivateArticles

#article_delete, #author_delete, #authors, #authors_add, #authors_replace, #categories, #categories_add, #categories_delete, #categories_replace, #create, #delete_all_files, #detail, #embargo_delete, #embargo_detail, #embargo_update, #file_delete, #file_detail, #files, #link_create, #link_delete, #link_update, #links, #list, #publish, #reserve_doi, #reserve_handle, #search, #update

Methods inherited from Base

#initialize

Constructor Details

This class inherits a constructor from Figshare::Base

Instance Attribute Details

#article_idObject

Returns the value of attribute article_id.



13
14
15
# File 'lib/upload.rb', line 13

def article_id
  @article_id
end

#bad_countObject

Returns the value of attribute bad_count.



14
15
16
# File 'lib/upload.rb', line 14

def bad_count
  @bad_count
end

#file_idObject

Returns the value of attribute file_id.



13
14
15
# File 'lib/upload.rb', line 13

def file_id
  @file_id
end

#file_infoObject

Returns the value of attribute file_info.



13
14
15
# File 'lib/upload.rb', line 13

def file_info
  @file_info
end

#file_nameObject

Returns the value of attribute file_name.



13
14
15
# File 'lib/upload.rb', line 13

def file_name
  @file_name
end

#new_countObject

Returns the value of attribute new_count.



14
15
16
# File 'lib/upload.rb', line 14

def new_count
  @new_count
end

#upload_hostObject

Returns the value of attribute upload_host.



13
14
15
# File 'lib/upload.rb', line 13

def upload_host
  @upload_host
end

#upload_parts_detailObject

Returns the value of attribute upload_parts_detail.



13
14
15
# File 'lib/upload.rb', line 13

def upload_parts_detail
  @upload_parts_detail
end

#upload_queryObject

Returns the value of attribute upload_query.



13
14
15
# File 'lib/upload.rb', line 13

def upload_query
  @upload_query
end

Class Method Details

.get_file_check_data(filename) ⇒ String, Integer

Calculate a local files MD5.



20
21
22
23
24
25
26
27
28
29
# File 'lib/upload.rb', line 20

def self.get_file_check_data(filename)
  stat_record =  File.stat(filename)
  md5 = Digest::MD5.new
  File.open(filename, 'rb') do |fd|
    while(buffer = fd.read(CHUNK_SIZE)) 
      md5.update(buffer)
    end
  end
  return md5.hexdigest, stat_record.size
end

Instance Method Details

#statusHash

Get status of the current upload. Just fetches the file record from figshare. Of interest is the status field, and the computed_md5 field



132
133
134
135
136
137
138
# File 'lib/upload.rb', line 132

def status
  @file_info = nil
  file_detail(article_id: @article_id, file_id: @file_id) do |f|
    @file_info = f
  end
  raise "Upload::status(): Failed to get figshare file record" if @file_info.nil?
end

#upload(article_id:, file_name:, trace: 0) ⇒ Object

Upload the file, to the Figshare article



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/upload.rb', line 36

def upload(article_id:, file_name:, trace: 0)
  @article_id = article_id
  @file_name = file_name
  @trace = trace
  
  @file_id = nil
  @file_info = nil
  @upload_query = nil
  @upload_host = nil
  @upload_parts_detail  = nil
  
  initiate_new_upload() 
  puts "New File_id: #{@file_id}\n\n" if @trace > 1
  
  get_file_info()
  puts "@file_info: #{@file_info.to_j}\n\n" if @trace > 1
  
  get_upload_parts_details()
  puts "@upload_parts_detail: #{@upload_parts_detail.to_j}\n\n" if @trace > 1
  
  upload_the_parts()
  
  complete_upload()
  if @trace > 1
    status
    puts "Final Status: #{@file_info.to_j}\n\n"
  end
end

#upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0) ⇒ Object

Upload all files in a directory, into one article. Check checksums, and only upload changed or new files Does not recurse through sub-directories, as figshare has a flat file structure.



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/upload.rb', line 73

def upload_dir(article_id:, directory:, delete_extras: false, exclude_dot_files: true, trace: 0)
  @new_count = 0
  @bad_count = 0
  
  files = {}
  cache_article_file_md5(article_id: article_id)
  
  DirR.walk_dir(directory: directory, walk_sub_directories: false) do |d,f|
    next if exclude_dot_files && f =~ /^\..*/
    files[f] = true  #note that we have seen this filename
    if @md5_cache[f] #check to see if it has already been uploaded
      md5, size = Upload.get_file_check_data("#{d}/#{f}")
      if @md5_cache[f][:md5] != md5 #file is there, but has changed, or previously failed to upload.
        puts "Deleting: #{article_id} << #{d}/#{f} #{@md5_cache[f][:id]} MISMATCH '#{@md5_cache[f]}' != '#{md5}'" if trace > 0
        file_delete(article_id: article_id, file_id: @md5_cache[f][:id])
        @bad_count += 1
        puts "Re-ADDING: #{article_id} << #{d}/#{f}" if trace > 0
        upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
        @new_count += 1
      elsif trace > 1
        puts "EXISTS: #{article_id} #{d}/#{f}"
      end
    else
      puts "ADDING: #{article_id} << #{d}/#{f}" if trace > 0
      upload(article_id: article_id, file_name: "#{d}/#{f}", trace: trace)
      @new_count += 1
    end
  end
  
  # Print out filename of files in the Figshare article, that weren't in the directory.
  @md5_cache.each do |fn,v|
    if ! files[fn]  
      #File exists on Figshare, but not on the local disk
      if delete_extras
        puts "Deleteing EXTRA: #{article_id} << #{fn} #{v[:id]}" if trace > 0
        file_delete(article_id: article_id, file_id: @md5_cache[f][:id]) 
      elsif trace > 0
        puts "EXTRA: #{article_id} << #{fn} #{v[:id]}" 
      end
    end
  end
end