Module: MiGA::Cli::Action::Download::Ncbi
- Includes:
- Base
- Included in:
- NcbiGet
- Defined in:
- lib/miga/cli/action/download/ncbi.rb
Overview
Helper module including download functions for the ncbi_get action
Instance Method Summary
collapse
Methods included from Base
#cli_base_flags, #cli_filters, #cli_save_actions, #discard_excluded, #download_entries, #exclude_newer, #finalize_tasks, #generic_perform, #impose_limit, #load_ncbi_taxonomy_dump, #load_tasks, #save_entry, #unlink_entries
Instance Method Details
#cli_name_modifiers(opt) ⇒ Object
31
32
33
34
35
36
37
38
39
40
41
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 31
def cli_name_modifiers(opt)
opt.on(
'--no-version-name',
'Do not add sequence version to the dataset name',
'Only affects --complete and --chromosome'
) { |v| cli[:add_version] = v }
opt.on('--legacy-name', '::HIDE::') do
warn 'Deprecated flag --legacy-name ignored'
end
end
|
#cli_task_flags(opt) ⇒ Object
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 11
def cli_task_flags(opt)
cli.opt_flag(
opt, 'reference',
'Download all reference genomes (ignore any other status)'
)
cli.opt_flag(opt, 'complete', 'Download complete genomes')
cli.opt_flag(opt, 'chromosome', 'Download complete chromosomes')
cli.opt_flag(opt, 'scaffold', 'Download genomes in scaffolds')
cli.opt_flag(opt, 'contig', 'Download genomes in contigs')
opt.on('--all', 'Download all genomes (in any status)') do
cli[:complete] = true
cli[:chromosome] = true
cli[:scaffold] = true
cli[:contig] = true
end
opt.on('--ncbi-list-json STRING', '::HIDE::') do |v|
cli[:ncbi_list_json] = v
end
end
|
#parse_reports_as_datasets(reports) ⇒ Object
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 112
def parse_reports_as_datasets(reports)
ds = {}
reports.each do |r|
asm = r[:accession]
next if asm.nil? || asm.empty? || asm == '-'
n = remote_report_name(r, asm)
ds[n] = {
ids: [asm], db: :assembly, universe: :ncbi,
md: {
type: :genome, ncbi_asm: asm,
strain: r.dig(:organism, :infraspecific_names, :strain)
}
}
date = r.dig(:assembly_info, :release_date)
ds[n][:md][:release_date] = Time.parse(date).to_s if date
ds[n][:md][:ncbi_dataset] = r
end
ds
end
|
#read_ncbi_list_json(file) ⇒ Object
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 83
def read_ncbi_list_json(file)
cli.say "Reusing remote list: #{file}"
list = {}
n_tot = nil
File.open(file, 'r') do |fh|
n_tot = fh.gets.chomp.sub(/^# /, '').to_i
fh.each_with_index do |ln, k|
row = ln.chomp.split("\t", 2)
list[row[0]] = MiGA::Json.parse(row[1], contents: true)
cli.advance('Lines:', k, n_tot)
end
cli.say
end
return list
end
|
#remote_list ⇒ Object
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 53
def remote_list
if cli[:ncbi_list_json] && File.size?(cli[:ncbi_list_json])
return read_ncbi_list_json(cli[:ncbi_list_json])
end
cli.say "Obtaining remote list of datasets"
list = {}
query = remote_list_query
loop do
page = MiGA::Json.parse(
MiGA::RemoteDataset.download(:ncbi_datasets, :genome, query, :json),
contents: true
)
break unless page&.any? && page[:reports]&.any?
list.merge!(parse_reports_as_datasets(page[:reports]))
cli.advance('Datasets:', list.size, page[:total_count])
break unless page[:next_page_token]
query[:page_token] = page[:next_page_token]
end
cli.say
write_ncbi_list_json(cli[:ncbi_list_json], list) if cli[:ncbi_list_json]
list
end
|
#remote_list_query ⇒ Object
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 142
def remote_list_query
q = { taxons: [cli[:taxon]], filters: {} }
if cli[:reference]
q[:filters][:reference_only] = true
else
q[:assembly_level] = {
contig: 'contig',
scaffold: 'scaffold',
chromosome: 'chromosome',
complete: 'complete_genome'
}.map { |k, v| '"' + v + '"' if cli[k] }.compact
end
q
end
|
#remote_report_name(r, asm) ⇒ Object
134
135
136
137
138
139
140
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 134
def remote_report_name(r, asm)
acc = "#{asm}"
acc.gsub!(/\.\d+\Z/, '') unless cli[:add_version]
org = r.dig(:organism, :organism_name)
acc = "#{org}_#{acc}" if org
acc.miga_name
end
|
#sanitize_cli ⇒ Object
43
44
45
46
47
48
49
50
51
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 43
def sanitize_cli
cli.ensure_par(taxon: '-T')
tasks = %w[reference complete chromosome scaffold contig]
unless tasks.any? { |i| cli[i.to_sym] }
raise 'No action requested: pick at least one type of genome'
end
cli[:save_every] = 1 if cli[:dry]
end
|
#write_ncbi_list_json(file, list) ⇒ Object
99
100
101
102
103
104
105
106
107
108
109
110
|
# File 'lib/miga/cli/action/download/ncbi.rb', line 99
def write_ncbi_list_json(file, list)
cli.say "Saving remote list: #{file}"
File.open(file, 'w') do |fh|
fh.puts('# %i' % list.size)
kk = 0
list.each do |k, v|
fh.puts([k, MiGA::Json.generate_fast(v)].join("\t"))
cli.advance('Datasets:', kk += 1, list.size)
end
cli.say
end
end
|