Module: MiGA::Project::Dataset

Included in:
MiGA::Project
Defined in:
lib/miga/project/dataset.rb

Overview

Helper module including specific functions handle datasets.

Instance Method Summary collapse

Instance Method Details

#add_dataset(name) ⇒ Object

Add dataset identified by name and return MiGA::Dataset.



49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/miga/project/dataset.rb', line 49

def add_dataset(name)
  unless [:datasets].include? name
    d = MiGA::Dataset.new(self, name)
    @metadata[:datasets] << name
    @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
    save
    if d.ref? && d.active?
      recalculate_tasks("Reference dataset added: #{d.name}")
    end
    pull_hook(:on_add_dataset, name)
  end
  dataset(name)
end

#dataset(name) ⇒ Object

Returns MiGA::Dataset



28
29
30
31
32
33
34
35
# File 'lib/miga/project/dataset.rb', line 28

def dataset(name)
  name = name.miga_name
  return nil unless MiGA::Dataset.exist?(self, name)

  @datasets ||= {}
  @datasets[name] ||= MiGA::Dataset.new(self, name)
  @datasets[name]
end

#dataset_namesObject

Returns Array of String (without evaluating dataset objects).



15
16
17
# File 'lib/miga/project/dataset.rb', line 15

def dataset_names
  [:datasets]
end

#dataset_names_hashObject

Returns Hash of Strings => true. Similar to dataset_names but as Hash for efficiency.



22
23
24
# File 'lib/miga/project/dataset.rb', line 22

def dataset_names_hash
  @dataset_names_hash ||= Hash[dataset_names.map { |i| [i, true] }]
end

#datasetsObject

Returns Array of MiGA::Dataset.



9
10
11
# File 'lib/miga/project/dataset.rb', line 9

def datasets
  [:datasets].map { |name| dataset(name) }
end

#done_preprocessing?(save = false) ⇒ Boolean

Are all the datasets in the project preprocessed? Save intermediate results if save (until the first incomplete dataset is reached).

Returns:

  • (Boolean)


143
144
145
146
147
# File 'lib/miga/project/dataset.rb', line 143

def done_preprocessing?(save = false)
  !each_dataset.any? do |d|
    d.ref? && d.active? && !d.done_preprocessing?(save)
  end
end

#each_dataset(&blk) ⇒ Object

Iterate through datasets (MiGA::Dataset)



39
40
41
42
43
44
45
# File 'lib/miga/project/dataset.rb', line 39

def each_dataset(&blk)
  if block_given?
    [:datasets].each { |name| blk.call(dataset(name)) }
  else
    to_enum(:each_dataset)
  end
end

#each_dataset_profile_advance(&blk) ⇒ Object

Call blk passing the result of MiGA::Dataset#profile_advance for each registered dataset.



165
166
167
# File 'lib/miga/project/dataset.rb', line 165

def each_dataset_profile_advance(&blk)
  each_dataset { |ds| blk.call(ds.profile_advance) }
end

#import_dataset(ds, method = :hardlink) ⇒ Object

Import the dataset ds, a MiGA::Dataset, using method which is any method supported by File#generic_transfer.



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/miga/project/dataset.rb', line 81

def import_dataset(ds, method = :hardlink)
  raise "Impossible to import dataset, it already exists: #{ds.name}." if
    MiGA::Dataset.exist?(self, ds.name)

  # Import dataset results
  ds.each_result do |task, result|
    # import result files
    result.each_file do |file|
      File.generic_transfer(
        File.join(result.dir, file),
        File.join(path, 'data', MiGA::Dataset.RESULT_DIRS[task], file),
        method
      )
    end
    # import result metadata
    %w(json start done).each do |suffix|
      if File.exist? File.join(result.dir, "#{ds.name}.#{suffix}")
        File.generic_transfer(
          File.join(result.dir, "#{ds.name}.#{suffix}"),
          File.join(
            path, 'data', MiGA::Dataset.RESULT_DIRS[task],
            "#{ds.name}.#{suffix}"
          ),
          method
        )
      end
    end
  end
  # Import dataset metadata
  File.generic_transfer(
    File.join(ds.project.path, 'metadata', "#{ds.name}.json"),
    File.join(self.path, 'metadata', "#{ds.name}.json"),
    method
  )
  # Save dataset
  self.add_dataset(ds.name)
end

#profile_datasets_advanceObject

Returns a two-dimensional matrix (Array of Array) where the first index corresponds to the dataset, the second index corresponds to the dataset task, and the value corresponds to:

  • 0: Before execution.

  • 1: Done (or not required).

  • 2: To do.



156
157
158
159
160
# File 'lib/miga/project/dataset.rb', line 156

def profile_datasets_advance
  advance = []
  each_dataset_profile_advance { |adv| advance << adv }
  advance
end

Unlink dataset identified by name and return MiGA::Dataset.



65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/miga/project/dataset.rb', line 65

def unlink_dataset(name)
  d = dataset(name)
  return nil if d.nil?

  self.[:datasets].delete(name)
  save
  if d.ref? && d.active?
    recalculate_tasks("Reference dataset unlinked: #{d.name}")
  end
  pull_hook(:on_unlink_dataset, name)
  d
end

#unregistered_datasetsObject

Find all datasets with (potential) result files but are yet unregistered.



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/miga/project/dataset.rb', line 121

def unregistered_datasets
  datasets = []
  MiGA::Dataset.RESULT_DIRS.values.each do |dir|
    dir_p = "#{path}/data/#{dir}"
    next unless Dir.exist? dir_p

    Dir.entries(dir_p).each do |file|
      next unless
        file =~ %r{
          \.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
        }x

      m = /([^\.]+)/.match(file)
      datasets << m[1] unless m.nil? or m[1] == "miga-project"
    end
  end
  datasets.uniq - [:datasets]
end