Module: Association

Defined in:
lib/rbbt/association.rb,
lib/rbbt/association/index.rb

Defined Under Namespace

Modules: Index

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.databasesObject

Returns the value of attribute databases.



7
8
9
# File 'lib/rbbt/association.rb', line 7

def databases
  @databases
end

Class Method Details

.add_reciprocal(tsv) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/rbbt/association.rb', line 13

def self.add_reciprocal(tsv)

  new = {}
  tsv.with_unnamed do
    tsv.through do |key, values|
      new[key] ||= values
      Misc.zip_fields(values).each do |fields|
        target, *rest = fields
        
        target_values = new[target] || tsv[target] || [[]] * values.length
        zipped_target_values = Misc.zip_fields(target_values) 

        zipped_target_values << ([key].concat rest)
        
        new_values = Misc.zip_fields zipped_target_values

        new[target] = new_values
      end
    end
  end

  tsv.annotate(new)

  new
end

.calculate_headers(key_field, fields, spec) ⇒ Object



56
57
58
59
60
61
62
63
64
65
# File 'lib/rbbt/association.rb', line 56

def self.calculate_headers(key_field, fields, spec)
  all_fields = [key_field].concat fields if fields and key_field
  field, header, format = parse_field_specification spec if spec

  if field and key_field == field and not all_fields.include? field
    field, header, format = resolve_field field, all_fields
  end

  [field, header, format]
end

.index(file, options = {}, persist_options = {}) ⇒ Object

def self.get_index(index_file, write = false)

Persist.open_tokyocabinet(index_file, write, :list, TokyoCabinet::BDB).tap{|r| r.unnamed = true; Association::Index.setup r }

end



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/rbbt/association.rb', line 263

def self.index(file, options = {}, persist_options = {})
  options = {} if options.nil?
  options = Misc.add_defaults options, :persist => true
  persist_options = {} if persist_options.nil?

  Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association Index"}.merge(persist_options).merge(:engine => TokyoCabinet::BDB, :serializer => :clean)) do |assocs|
    undirected = options[:undirected]
    if file
      tsv = TSV === file ? file : Association.open(file, options, persist_options.merge(:persist => false))

      fields = tsv.fields
      key_field = [tsv.key_field, fields.first.split(":").last, undirected ? "undirected" : nil].compact * "~"

      TSV.setup(assocs, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)

      tsv.with_unnamed do
        tsv.with_monitor :desc => "Extracting associations" do
          case tsv.type
          when :list
            tsv.through do |source, values|
              target, *rest = values
              next if source.nil? or source.empty? or target.nil? or target.empty?

              key = [source, target] * "~"
              assocs[key] = rest
            end
          when :flat
            tsv.through do |source, targets|
              next if source.nil? or source.empty? or targets.nil? or targets.empty?

              targets.each do |target|
                next if target.nil? or target.empty?
                key = [source, target] * "~"
                assocs[key] = nil
              end
            end

          when :double
            tsv.through do |source, values|
              next if values.empty?
              next if source.nil?
              next if values.empty?
              targets = values.first
              rest = Misc.zip_fields values[1..-1]

              annotations = rest.length > 1 ?
                targets.zip(rest) :
                targets.zip(rest * targets.length) 

              annotations.each do |target, info|
                next if target.nil?
                key = [source, target] * "~"
                assocs[key] = info
              end
            end
          else
            raise "Type not supported: #{tsv.type}"
          end
        end
      end
    else
      key_field, fields = options.values_at :key_field, :fields
      TSV.setup(assocs, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
    end
    assocs.close

    assocs
  end.tap do |assocs|
    Association::Index.setup assocs
  end
end

.load_tsv(file, options) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
# File 'lib/rbbt/association.rb', line 203

def self.load_tsv(file, options)
  undirected = Misc.process_options options, :undirected

  case file
  when Proc
    return load_tsv(file.call, options)
  when TSV
    key_field, *fields = all_fields = file.all_fields
  else 
    key_field, *fields = all_fields = TSV.parse_header(file, options.merge(:fields => nil, :key_field => nil)).all_fields
  end

  source, source_header, source_format, target, target_header, target_format, fields = specs(all_fields, options)
 
  Log.low("Loading associations from: #{ Misc.fingerprint file }")
  Log.low("sources: #{ [source, source_header, source_format].join(", ") }")
  Log.low("targets: #{ [target, target_header, target_format].join(", ") }")

  tsv = open_tsv(file, source, source_header, target, target_header, all_fields, options.merge(:fields => fields.dup))

  tsv = translate_tsv(tsv, source_format, target_format)

  tsv = add_reciprocal(tsv) if undirected

  tsv
end

.open(file, options = {}, persist_options = {}) ⇒ Object



230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/rbbt/association.rb', line 230

def self.open(file, options = {}, persist_options = {})
  options = {} if options.nil?
  persist_options = {} if persist_options.nil?

  namespace = options[:namespace]
  old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
  old_file.annotate file if Path === old_file

  Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association"}.merge(persist_options)) do |data|
    options = options.clone

    tsv = load_tsv(file, options)

    tsv.annotate(data)
    data.serializer = tsv.type if TokyoCabinet::HDB === data

    tsv.with_unnamed do
      tsv.each do |k,v|
        next if v.nil?
        data[k] = v
      end
    end

    data
  end
end

.open_tsv(file, source, source_header, target, target_header, all_fields, options) ⇒ Object

{{{ Open



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/rbbt/association.rb', line 69

def self.open_tsv(file, source, source_header, target, target_header, all_fields, options)
  fields = Misc.process_options options, :fields
  fields ||= all_fields.dup

  fields.delete source 
  fields.delete target
  fields.unshift target 

  open_options = options.merge({
    :persist => false,
    :key_field => all_fields.index(source), 
    :fields => fields.collect{|f| String === f ? all_fields.index(f): f },
    :type => options[:type].to_s == :flat ? :flat : :double,
    :merge => options[:type].to_s == :flat ? false : true
  })

  # Preserve first line, which would have been considered a header otherwise
  open_options["header_hash"] = "#" if options["header_hash"] == ""

  field_headers = all_fields.values_at *open_options[:fields]

  tsv = case file
        when TSV
          if file.fields == field_headers
            file
          else
            file.reorder(source, field_headers)
          end
        else
          TSV.open(file, open_options)
        end

  tsv.fields = field_headers
  tsv.key_field = source

  # Fix source header
  if source_header and tsv.key_field != source_header
    tsv.key_field = source_header
  end

  # Fix target header
  if target_header and tsv.fields.first != target_header
    tsv.fields = tsv.fields.collect{|f| f == target ? target_header : f }
  end

  tsv
end

.parse_field_specification(spec) ⇒ Object



46
47
48
49
50
51
52
53
54
# File 'lib/rbbt/association.rb', line 46

def self.parse_field_specification(spec)
  return [2,nil,nil] if Fixnum === spec
  spec = spec.split "=>" unless Array === spec
  field_part, final_format = spec

  field, format = field_part.split "=~"

  [field, format, final_format]
end

.resolve_field(name, fields) ⇒ Object



39
40
41
42
43
44
# File 'lib/rbbt/association.rb', line 39

def self.resolve_field(name, fields)
  entity_type = Entity.formats[name]
  return "Field #{ name } could not be resolved: #{fields}" if entity_type.nil?
  field = fields.select{|f| Entity.formats[f] == entity_type}.first
  [field, nil, name]
end

.specs(all_fields, options = {}) ⇒ Object



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
# File 'lib/rbbt/association.rb', line 150

def self.specs(all_fields, options = {})
  source_spec, source_format, target_spec, target_format, format, key_field, fields = Misc.process_options options, :source, :source_format, :target, :target_format, :format, :key_field, :fields

  if key_field and all_fields
    key_pos = (Fixnum === key_field ? key_field : all_fields.index(key_field) )
    key_field = all_fields[key_pos]
  else
    key_field = all_fields.first if all_fields
  end

  if fields and all_fields
    field_pos = fields.collect{|f| Fixnum === f ? f : all_fields.index(f) }
    fields = all_fields.values_at *field_pos
  else
    #fields = all_fields[1..-1] if all_fields
  end

  source, source_header, orig_source_format = calculate_headers(key_field, fields || all_fields, source_spec)
  source_format ||= orig_source_format 
  source = key_field if source.nil? 
  source = key_field if source == :key
  source_header ||= source

  target, target_header, orig_target_format = calculate_headers(key_field, fields || all_fields, target_spec)
  target_format ||= orig_target_format 
  if target.nil?
    target = case
             when fields
               fields.first
             when key_field == source
               all_fields[1]
             else
               (([key_field] + all_fields) - [source]).first
             end
  end

  target = key_field if target == :key
  target_header ||= target

  case format
  when String
    source_format ||= format if Entity.formats[source_header] == Entity.formats[format]
    target_format ||= format if Entity.formats[target_header] == Entity.formats[format]
  when Hash
    _type = Entity.formats[source_header].to_s
    source_format ||= format[_type] if format.include? _type 
    _type = Entity.formats[target_header].to_s
    target_format ||= format[_type] if format.include? _type 
  end

  [source, source_header, source_format, target, target_header, target_format, fields || all_fields]
end

.translate_tsv(tsv, source_final_format, target_final_format) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/rbbt/association.rb', line 117

def self.translate_tsv(tsv, source_final_format, target_final_format)
  source_field = tsv.key_field
  target_field = tsv.fields.first

  if source_final_format and source_field != source_final_format and
    Entity.formats[source_field] and
    Entity.formats[source_final_format].all_formats.include? source_field
    Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")

    tsv.with_unnamed do
      tsv = tsv.change_key source_final_format, :identifiers => Organism.identifiers(tsv.namespace), :persist => true
    end
  end

  # Translate target 
  if target_final_format and target_field != target_final_format and
    Entity.formats[target_field] and
    Entity.formats[target_field] == Entity.formats[target_final_format]

    Log.debug("Changing target format from #{tsv.fields.first} to #{target_final_format}")

    save_key_field = tsv.key_field
    tsv.key_field = "MASKED"

    tsv.with_unnamed do
      tsv = tsv.swap_id tsv.fields.first, target_final_format, :identifiers => Organism.identifiers(tsv.namespace), :persist => true
    end

    tsv.key_field = save_key_field 
  end
  tsv
end