Class: MiGA::Dataset

Inherits:
MiGA
  • Object
show all
Includes:
Common::WithOption, Hooks, Result, Status, Type
Defined in:
lib/miga/dataset.rb,
lib/miga/dataset/base.rb

Overview

Dataset representation in MiGA

Defined Under Namespace

Modules: Base, Hooks, Result, Status, Type

Constant Summary

Constants included from MiGA

CITATION, VERSION, VERSION_DATE, VERSION_NAME

Instance Attribute Summary collapse

Attributes included from Common::Net

#remote_connection_uri

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Hooks

#default_hooks, #hook__pull_result_hooks, #hook_check_type, #hook_clear_run_counts, #hook_recalculate_status, #hook_run_cmd

Methods included from Common::Hooks

#add_hook, #default_hooks, #hook_run_lambda, #hooks, #pull_hook

Methods included from Type

#check_type, #markers?, #multi?, #nonmulti?, #type

Methods included from Status

#recalculate_status, #status

Methods included from Result

#cleanup_distances!, #done_preprocessing?, #first_preprocessing, #next_preprocessing, #profile_advance, #result_base, #result_status, #results_status

Methods included from Result::Add

#add_result_assembly, #add_result_cds, #add_result_distances, #add_result_essential_genes, #add_result_mytaxa, #add_result_mytaxa_scan, #add_result_raw_reads, #add_result_read_quality, #add_result_ssu, #add_result_stats, #add_result_taxonomy, #add_result_trimmed_fasta, #add_result_trimmed_reads

Methods included from Result::Ignore

#force_task?, #ignore_by_type?, #ignore_complete?, #ignore_empty?, #ignore_force?, #ignore_inactive?, #ignore_multi?, #ignore_nomarkers?, #ignore_nonmulti?, #ignore_noref?, #ignore_project?, #ignore_reasons, #ignore_task?, #ignore_upstream?, #why_ignore

Methods included from Common::WithResult

#add_result, #each_result, #get_result, #next_task, #recalculate_tasks, #result, #result_dirs, #results

Methods included from Common::WithOption

#all_options, #assert_has_option, #assert_valid_option_value, #option, #option?, #option_by_default, #option_by_metadata, #option_from_string, #set_option

Methods inherited from MiGA

CITATION, CITATION_ARRAY, DEBUG, DEBUG_OFF, DEBUG_ON, DEBUG_TRACE_OFF, DEBUG_TRACE_ON, FULL_VERSION, LONG_VERSION, VERSION, VERSION_DATE, #advance, debug?, debug_trace?, initialized?, #like_io?, #num_suffix, rc_path, #result_files_exist?, #say

Methods included from Common::Path

#root_path, #script_path

Methods included from Common::Format

#clean_fasta_file, #seqs_length, #tabulate

Methods included from Common::Net

#download_file_ftp, #http_request, #known_hosts, #main_server, #net_method, #normalize_encoding, #remote_connection

Methods included from Common::SystemCall

#run_cmd, #run_cmd_opts

Constructor Details

#initialize(project, name, is_ref = true, metadata = {}) ⇒ Dataset

Create a MiGA::Dataset object in a project MiGA::Project with a uniquely identifying name. is_ref indicates if the dataset is to be treated as reference (true, default) or query (false). Pass any additional metadata as a Hash.



56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/miga/dataset.rb', line 56

def initialize(project, name, is_ref = true,  = {})
  name = name.to_s
  name.miga_name? or
    raise 'Invalid name, please use only alphanumerics and underscores: ' +
          name

  @project, @name, @metadata = project, name, nil
  [:ref] = is_ref
  [:type] ||= :empty
  [:status] ||= 'incomplete'
  @metadata_future = [
    File.join(project.path, 'metadata', "#{name}.json"),
    
  ]
  return if File.exist? @metadata_future[0]

  save
  pull_hook :on_create
end

Instance Attribute Details

#nameObject (readonly)

Datasets are uniquely identified by name in a project



49
50
51
# File 'lib/miga/dataset.rb', line 49

def name
  @name
end

#projectObject (readonly)

MiGA::Project that contains the dataset



45
46
47
# File 'lib/miga/dataset.rb', line 45

def project
  @project
end

Class Method Details

.EXCLUDE_NOMARKER_TASKSObject

Tasks to be excluded from datasets without markers



37
38
39
# File 'lib/miga/dataset/base.rb', line 37

def EXCLUDE_NOMARKER_TASKS
  @@EXCLUDE_NOMARKER_TASKS
end

.EXCLUDE_NOREF_TASKSObject

Tasks to be excluded from query datasets



31
32
33
# File 'lib/miga/dataset/base.rb', line 31

def EXCLUDE_NOREF_TASKS
  @@EXCLUDE_NOREF_TASKS
end

.exist?(project, name) ⇒ Boolean

Does the project already have a dataset with that name?

Returns:

  • (Boolean)


30
31
32
# File 'lib/miga/dataset.rb', line 30

def exist?(project, name)
  project.dataset_names_set.include? name
end

.INFO_FIELDSObject

Standard fields of metadata for datasets



36
37
38
# File 'lib/miga/dataset.rb', line 36

def INFO_FIELDS
  %w[name created updated type ref user description comments]
end

.KNOWN_TYPESObject

Supported dataset types



18
19
20
# File 'lib/miga/dataset/base.rb', line 18

def KNOWN_TYPES
  @@KNOWN_TYPES
end

.ONLY_MULTI_TASKSObject

Tasks to be executed only in datasets that are multi-organism. These tasks are ignored for single-organism datasets or for unknwon types



51
52
53
# File 'lib/miga/dataset/base.rb', line 51

def ONLY_MULTI_TASKS
  @@ONLY_MULTI_TASKS
end

.ONLY_NONMULTI_TASKSObject

Tasks to be executed only in datasets that are single-organism. These tasks are ignored for multi-organism datasets or for unknown types



44
45
46
# File 'lib/miga/dataset/base.rb', line 44

def ONLY_NONMULTI_TASKS
  @@ONLY_NONMULTI_TASKS
end

.OPTIONSObject

Options supported by datasets



57
58
59
# File 'lib/miga/dataset/base.rb', line 57

def OPTIONS
  @@OPTIONS
end

.PREPROCESSING_TASKSObject

Returns an Array of tasks (Symbols) to be executed before project-wide tasks



25
26
27
# File 'lib/miga/dataset/base.rb', line 25

def PREPROCESSING_TASKS
  @@PREPROCESSING_TASKS
end

.RESULT_DIRSObject

Directories containing the results from dataset-specific tasks



12
13
14
# File 'lib/miga/dataset/base.rb', line 12

def RESULT_DIRS
  @@RESULT_DIRS
end

Instance Method Details

#activate!Object

Activate a dataset. This removes the :inactive flag



125
126
127
128
129
130
131
# File 'lib/miga/dataset.rb', line 125

def activate!
  [:inactive] = nil
  [:warn] = nil if [:warn] && [:warn] =~ /^Inactive: /
  .save
  project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
  pull_hook :on_activate
end

#active?Boolean Also known as: is_active?

Is this dataset active?

Returns:

  • (Boolean)


155
156
157
# File 'lib/miga/dataset.rb', line 155

def active?
  [:inactive].nil? || ![:inactive]
end

#closest_relatives(how_many = 1, ref_project = false) ⇒ Object

Returns an Array of how_many duples (Arrays) sorted by AAI:

  • 0: A String with the name(s) of the reference dataset.

  • 1: A Float with the AAI.

This function is currently only supported for query datasets when ref_project is false (default), and only for reference dataset when ref_project is true. It returns nil if this analysis is not supported.



186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/miga/dataset.rb', line 186

def closest_relatives(how_many = 1, ref_project = false)
  return nil if (ref? != ref_project) || multi?

  r = result(ref_project ? :taxonomy : :distances)
  return nil if r.nil?

  require 'miga/sqlite'
  MiGA::SQLite.new(r.file_path(:aai_db)).run(
    'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
    'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
  )
end

#inactivate!(reason = nil) ⇒ Object

Inactivate a dataset. This halts automated processing by the daemon

If given, the reason string is saved as a metadata :warn entry



115
116
117
118
119
120
121
# File 'lib/miga/dataset.rb', line 115

def inactivate!(reason = nil)
  [:warn] = "Inactive: #{reason}" unless reason.nil?
  [:inactive] = true
  .save
  project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
  pull_hook :on_inactivate
end

#infoObject

Get standard metadata values for the dataset as Array



135
136
137
138
139
# File 'lib/miga/dataset.rb', line 135

def info
  MiGA::Dataset.INFO_FIELDS.map do |k|
    k == 'name' ? name : [k]
  end
end

#metadataObject

MiGA::Metadata with information about the dataset



78
79
80
81
82
83
84
# File 'lib/miga/dataset.rb', line 78

def 
  if @metadata.nil?
    @metadata = MiGA::Metadata.new(*@metadata_future)
    pull_hook :on_load
  end
  @metadata
end

#query?Boolean Also known as: is_query?

Is this dataset a query (non-reference)?

Returns:

  • (Boolean)


149
150
151
# File 'lib/miga/dataset.rb', line 149

def query?
  ![:ref]
end

#ref?Boolean Also known as: is_ref?

Is this dataset a reference?

Returns:

  • (Boolean)


143
144
145
# File 'lib/miga/dataset.rb', line 143

def ref?
  !query?
end

#remove!Object

Delete the dataset with all it’s contents (including results) and returns nil



105
106
107
108
109
# File 'lib/miga/dataset.rb', line 105

def remove!
  results.each(&:remove!)
  .remove!
  pull_hook :on_remove
end

#saveObject

Save any changes you’ve made in the dataset



88
89
90
91
92
# File 'lib/miga/dataset.rb', line 88

def save
  MiGA.DEBUG "Dataset.save: #{name}"
  .save
  pull_hook :on_save
end

#save!Object

Forces a save even if nothing has changed in the metadata



96
97
98
99
100
# File 'lib/miga/dataset.rb', line 96

def save!
  MiGA.DEBUG "Dataset.save!: #{name}"
  .save!
  pull_hook :on_save
end