Module: MiGA::Cli::Action::Download::Base
Overview
Helper module including download functions for the *_get actions
Instance Method Summary collapse
- #cli_filters(opt) ⇒ Object
- #cli_save_actions(opt) ⇒ Object
- #discard_excluded(ds) ⇒ Object
- #download_entries(ds, p) ⇒ Object
- #finalize_tasks(d, downloaded) ⇒ Object
- #generic_perform ⇒ Object
- #impose_limit(ds) ⇒ Object
- #load_tasks ⇒ Object
-
#save_entry(name, body, p) ⇒ Object
Saves the (generic remote) entry identified by
name
withbody
into the projectp
, and returnstrue
on success andfalse
otherwise. - #unlink_entries(p, unlink) ⇒ Object
Instance Method Details
#cli_filters(opt) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/miga/cli/action/download/base.rb', line 10 def cli_filters(opt) opt.on( '--exclude PATH', 'A file with dataset names to exclude' ) { |v| cli[:exclude] = v } cli.opt_flag(opt, 'dry', 'Do not download or save the datasets') opt.on( '--ignore-until STRING', 'Ignores all datasets until a name is found (useful for large reruns)' ) { |v| cli[:ignore_until] = v } opt.on( '--ignore-removed', 'Ignores entries removed from NCBI (by default fails on removed entries)' ) { |v| cli[:ignore_removed] = v } cli.opt_flag( opt, 'get-metadata', 'Only download and update metadata for existing datasets', :get_md ) end |
#cli_save_actions(opt) ⇒ Object
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/miga/cli/action/download/base.rb', line 30 def cli_save_actions(opt) cli.opt_flag( opt, 'only-metadata', 'Create datasets without input data but retrieve all metadata', :only_md ) opt.on( '--save-every INT', Integer, 'Save project every this many downloaded datasets', 'If zero, it saves the project only once upon completion', "By default: #{cli[:save_every]}" ) { |v| cli[:save_every] = v } opt.on( '-q', '--query', 'Register the datasets as queries, not reference datasets' ) { |v| cli[:query] = v } opt.on( '-u', '--unlink', 'Unlink all datasets in the project missing from the download list' ) { |v| cli[:unlink] = v } opt.on( '-R', '--remote-list PATH', 'Path to an output file with the list of all datasets listed remotely' ) { |v| cli[:remote_list] = v } end |
#discard_excluded(ds) ⇒ Object
90 91 92 93 94 95 96 97 98 99 |
# File 'lib/miga/cli/action/download/base.rb', line 90 def discard_excluded(ds) unless cli[:exclude].nil? cli.say "Discarding datasets in #{cli[:exclude]}" File.readlines(cli[:exclude]) .select { |i| i !~ /^#/ } .map(&:chomp) .each { |i| ds.delete i } end ds end |
#download_entries(ds, p) ⇒ Object
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
# File 'lib/miga/cli/action/download/base.rb', line 111 def download_entries(ds, p) cli.say "Downloading #{ds.size} " + (ds.size == 1 ? 'entry' : 'entries') p.do_not_save = true if cli[:save_every] != 1 ignore = !cli[:ignore_until].nil? downloaded = 0 d = [] ds.each do |name, body| d << name cli.puts name ignore = false if ignore && name == cli[:ignore_until] next if ignore || p.dataset(name).nil? == cli[:get_md] downloaded += 1 unless cli[:dry] unless save_entry(name, body, p) downloaded -= 1 d.pop next end p.save! if cli[:save_every] > 1 && (downloaded % cli[:save_every]).zero? end end p.do_not_save = false p.save! if cli[:save_every] != 1 [d, downloaded] end |
#finalize_tasks(d, downloaded) ⇒ Object
74 75 76 77 78 79 80 81 82 83 |
# File 'lib/miga/cli/action/download/base.rb', line 74 def finalize_tasks(d, downloaded) cli.say "Datasets listed: #{d.size}" act = cli[:dry] ? 'to download' : 'downloaded' cli.say "Datasets #{act}: #{downloaded}" unless cli[:remote_list].nil? File.open(cli[:remote_list], 'w') do |fh| d.each { |i| fh.puts i } end end end |
#generic_perform ⇒ Object
56 57 58 59 60 61 62 63 |
# File 'lib/miga/cli/action/download/base.rb', line 56 def generic_perform p, ds = load_tasks d, downloaded = download_entries(ds, p) # Finalize finalize_tasks(d, downloaded) unlink_entries(p, p.dataset_names - d) if cli[:unlink] end |
#impose_limit(ds) ⇒ Object
101 102 103 104 105 106 107 108 109 |
# File 'lib/miga/cli/action/download/base.rb', line 101 def impose_limit(ds) max = cli[:max_datasets].to_i if !max.zero? && max < ds.size cli.say "Subsampling list from #{ds.size} to #{max} datasets" sample = ds.keys.sample(max) ds.select! { |k, _| sample.include? k } end ds end |
#load_tasks ⇒ Object
65 66 67 68 69 70 71 72 |
# File 'lib/miga/cli/action/download/base.rb', line 65 def load_tasks sanitize_cli p = cli.load_project ds = remote_list ds = discard_excluded(ds) ds = impose_limit(ds) [p, ds] end |
#save_entry(name, body, p) ⇒ Object
Saves the (generic remote) entry identified by name
with body
into the project p
, and returns true
on success and false
otherwise
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/miga/cli/action/download/base.rb', line 141 def save_entry(name, body, p) cli.say " Locating remote dataset: #{name}" body[:md][:metadata_only] = true if cli[:only_md] rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe]) if cli[:get_md] cli.say ' Updating dataset' rd.(p.dataset(name), body[:md]) else cli.say ' Creating dataset' rd.save_to(p, name, !cli[:query], body[:md]) cli.(p.add_dataset(name)) end true rescue MiGA::RemoteDataMissingError => e raise(e) unless cli[:ignore_removed] cli.say " Removed dataset ignored: #{name}" false end |
#unlink_entries(p, unlink) ⇒ Object
85 86 87 88 |
# File 'lib/miga/cli/action/download/base.rb', line 85 def unlink_entries(p, unlink) unlink.each { |i| p.unlink_dataset(i).remove! } cli.say "Datasets unlinked: #{unlink.size}" end |