Class: MiGA::Dataset

Inherits:
MiGA
  • Object
show all
Includes:
Common::WithOption, Hooks, Result, Status
Defined in:
lib/miga/dataset.rb,
lib/miga/dataset/base.rb

Overview

Dataset representation in MiGA

Defined Under Namespace

Modules: Base, Hooks, Result, Status

Constant Summary

Constants included from MiGA

CITATION, VERSION, VERSION_DATE, VERSION_NAME

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Hooks

#default_hooks, #hook__pull_result_hooks, #hook_clear_run_counts, #hook_recalculate_status, #hook_run_cmd

Methods included from Common::Hooks

#add_hook, #default_hooks, #hook_run_lambda, #hooks, #pull_hook

Methods included from Status

#recalculate_status, #status

Methods included from Result

#cleanup_distances!, #done_preprocessing?, #first_preprocessing, #ignore_task?, #next_preprocessing, #profile_advance, #result_base, #result_status, #results_status, #why_ignore

Methods included from Common::WithResult

#add_result, #each_result, #get_result, #next_task, #recalculate_tasks, #result, #result_dirs, #results

Methods included from Common::WithOption

#all_options, #assert_has_option, #assert_valid_option_value, #option, #option?, #option_by_default, #option_by_metadata, #option_from_string, #set_option

Methods inherited from MiGA

CITATION, CITATION_ARRAY, DEBUG, DEBUG_OFF, DEBUG_ON, DEBUG_TRACE_OFF, DEBUG_TRACE_ON, FULL_VERSION, LONG_VERSION, VERSION, VERSION_DATE, #advance, debug?, debug_trace?, initialized?, #like_io?, #num_suffix, rc_path, #result_files_exist?, #say

Methods included from Common::Path

#root_path, #script_path

Methods included from Common::Format

#clean_fasta_file, #seqs_length, #tabulate

Methods included from Common::Net

#download_file_ftp, #known_hosts, #remote_connection

Methods included from Common::SystemCall

#run_cmd, #run_cmd_opts

Constructor Details

#initialize(project, name, is_ref = true, metadata = {}) ⇒ Dataset

Create a MiGA::Dataset object in a project MiGA::Project with a uniquely identifying name. is_ref indicates if the dataset is to be treated as reference (true, default) or query (false). Pass any additional metadata as a Hash.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/miga/dataset.rb', line 53

def initialize(project, name, is_ref = true,  = {})
  name.miga_name? or
    raise 'Invalid name, please use only alphanumerics and underscores: ' +
          name.to_s
  @project, @name, @metadata = project, name, nil
  [:ref] = is_ref
  @metadata_future = [
    File.join(project.path, 'metadata', "#{name}.json"),
    
  ]
  return if File.exist? @metadata_future[0]

  save
  pull_hook :on_create
end

Instance Attribute Details

#nameObject (readonly)

Datasets are uniquely identified by name in a project



46
47
48
# File 'lib/miga/dataset.rb', line 46

def name
  @name
end

#projectObject (readonly)

MiGA::Project that contains the dataset



42
43
44
# File 'lib/miga/dataset.rb', line 42

def project
  @project
end

Class Method Details

.exist?(project, name) ⇒ Boolean

Does the project already have a dataset with that name?

Returns:

  • (Boolean)


27
28
29
# File 'lib/miga/dataset.rb', line 27

def exist?(project, name)
  !project.dataset_names_hash[name].nil?
end

.INFO_FIELDSObject

Standard fields of metadata for datasets



33
34
35
# File 'lib/miga/dataset.rb', line 33

def INFO_FIELDS
  %w[name created updated type ref user description comments]
end

.KNOWN_TYPESObject



14
15
16
# File 'lib/miga/dataset/base.rb', line 14

def KNOWN_TYPES
  @@KNOWN_TYPES
end

.OPTIONSObject



22
23
24
# File 'lib/miga/dataset/base.rb', line 22

def OPTIONS
  @@OPTIONS
end

.PREPROCESSING_TASKSObject



18
19
20
# File 'lib/miga/dataset/base.rb', line 18

def PREPROCESSING_TASKS
  @@PREPROCESSING_TASKS
end

.RESULT_DIRSObject



10
11
12
# File 'lib/miga/dataset/base.rb', line 10

def RESULT_DIRS
  @@RESULT_DIRS
end

Instance Method Details

#activate!Object

Activate a dataset. This removes the :inactive flag



121
122
123
124
125
126
127
# File 'lib/miga/dataset.rb', line 121

def activate!
  [:inactive] = nil
  [:warn] = nil if [:warn] && [:warn] =~ /^Inactive: /
  .save
  project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
  pull_hook :on_activate
end

#active?Boolean Also known as: is_active?

Is this dataset active?

Returns:

  • (Boolean)


167
168
169
# File 'lib/miga/dataset.rb', line 167

def active?
  [:inactive].nil? or ![:inactive]
end

#closest_relatives(how_many = 1, ref_project = false) ⇒ Object

Returns an Array of how_many duples (Arrays) sorted by AAI:

  • 0: A String with the name(s) of the reference dataset.

  • 1: A Float with the AAI.

This function is currently only supported for query datasets when ref_project is false (default), and only for reference dataset when ref_project is true. It returns nil if this analysis is not supported.



198
199
200
201
202
203
204
205
206
207
208
209
# File 'lib/miga/dataset.rb', line 198

def closest_relatives(how_many = 1, ref_project = false)
  return nil if (ref? != ref_project) || multi?

  r = result(ref_project ? :taxonomy : :distances)
  return nil if r.nil?

  require 'miga/sqlite'
  MiGA::SQLite.new(r.file_path(:aai_db)).run(
    'SELECT seq2, aai FROM aai WHERE seq2 != ? ' \
    'GROUP BY seq2 ORDER BY aai DESC LIMIT ?', [name, how_many]
  )
end

#inactivate!(reason = nil) ⇒ Object

Inactivate a dataset. This halts automated processing by the daemon

If given, the reason string is saved as a metadata :warn entry



111
112
113
114
115
116
117
# File 'lib/miga/dataset.rb', line 111

def inactivate!(reason = nil)
  [:warn] = "Inactive: #{reason}" unless reason.nil?
  [:inactive] = true
  .save
  project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
  pull_hook :on_inactivate
end

#infoObject

Get standard metadata values for the dataset as Array



131
132
133
134
135
# File 'lib/miga/dataset.rb', line 131

def info
  MiGA::Dataset.INFO_FIELDS.map do |k|
    k == 'name' ? name : [k]
  end
end

#metadataObject

MiGA::Metadata with information about the dataset



71
72
73
74
75
76
77
# File 'lib/miga/dataset.rb', line 71

def 
  if @metadata.nil?
    @metadata = MiGA::Metadata.new(*@metadata_future)
    pull_hook :on_load
  end
  @metadata
end

#multi?Boolean Also known as: is_multi?

Is this dataset known to be multi-organism?

Returns:

  • (Boolean)


151
152
153
154
155
# File 'lib/miga/dataset.rb', line 151

def multi?
  return false if [:type].nil? || @@KNOWN_TYPES[type].nil?

  @@KNOWN_TYPES[type][:multi]
end

#nonmulti?Boolean Also known as: is_nonmulti?

Is this dataset known to be single-organism?

Returns:

  • (Boolean)


159
160
161
162
163
# File 'lib/miga/dataset.rb', line 159

def nonmulti?
  return false if [:type].nil? || @@KNOWN_TYPES[type].nil?

  !@@KNOWN_TYPES[type][:multi]
end

#query?Boolean Also known as: is_query?

Is this dataset a query (non-reference)?

Returns:

  • (Boolean)


145
146
147
# File 'lib/miga/dataset.rb', line 145

def query?
  ![:ref]
end

#ref?Boolean Also known as: is_ref?

Is this dataset a reference?

Returns:

  • (Boolean)


139
140
141
# File 'lib/miga/dataset.rb', line 139

def ref?
  !query?
end

#remove!Object

Delete the dataset with all it’s contents (including results) and returns nil



101
102
103
104
105
# File 'lib/miga/dataset.rb', line 101

def remove!
  results.each(&:remove!)
  .remove!
  pull_hook :on_remove
end

#saveObject Also known as: save!

Save any changes you’ve made in the dataset



81
82
83
84
85
# File 'lib/miga/dataset.rb', line 81

def save
  MiGA.DEBUG "Dataset.metadata: #{.data}"
  .save
  pull_hook :on_save
end

#typeObject

Get the type of dataset as Symbol



94
95
96
# File 'lib/miga/dataset.rb', line 94

def type
  [:type]
end