Module: TSV

Defined in:
lib/rbbt/statistics/random_walk.rb,
lib/rbbt/statistics/rank_product.rb,
lib/rbbt/statistics/hypergeometric.rb

Class Method Summary collapse

Instance Method Summary collapse

Class Method Details

.rank_enrichment(tsv, list, options = {}) ⇒ Object



497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
# File 'lib/rbbt/statistics/random_walk.rb', line 497

def self.rank_enrichment(tsv, list, options = {})
  masked = options[:masked]
  if tsv.fields
    res = TSV.setup({}, :cast => :to_f, :type => :double, :key_field => tsv.key_field, :fields => ["p-value", tsv.fields.first]) 
  else
    res = TSV.setup({}, :cast => :to_f, :type => :double) 
  end

  list = list.clean_annotations if list.respond_to? :clean_annotations
  tsv.with_monitor :desc => "Rank enrichment" do
    tsv.with_unnamed do
      tsv.through do |key, values|
        next if masked and masked.include? key
        values = values.flatten.compact.reject{|v| v.empty?}
        matches = (values.respond_to?(:subset) ? values.subset(list) :  values & list).compact
        next if matches.length < 2
        pvalue = rank_enrichment_for_list(list, values, options)
        res[key] = [pvalue, matches]
      end
    end
  end

  FDR.adjust_hash! res, 0 if options[:fdr]

  res
end

.rank_enrichment_for_list(list, hits, options = {}) ⇒ Object



487
488
489
490
491
492
493
494
495
# File 'lib/rbbt/statistics/random_walk.rb', line 487

def self.rank_enrichment_for_list(list, hits, options = {})
  cutoff = options[:cutoff]
  list.extend OrderedList
  if cutoff
    list.pvalue(hits, cutoff, options)
  else
    list.pvalue(hits, nil, options)
  end
end

Instance Method Details

#annotation_counts(fields = nil, persistence = false, options = {}) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# File 'lib/rbbt/statistics/hypergeometric.rb', line 113

def annotation_counts(fields = nil, persistence = false, options = {})
  fields ||= self.fields
  fields = [fields] if String === fields or Symbol === fields
  rename = options.delete :rename

  field_pos = fields.collect{|f| self.fields.index f}.compact
  persistence_path = self.respond_to?(:persistence_path)? self.persistence_path : nil
  Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts", :other => {:rename => rename, :persistence_path => persistence_path}) do 
    data ||= {}

    with_unnamed do

      case type
      when :single
        through :key, field_pos do |key, value|
          next if value.nil? 
          data[value] ||= []
          data[value] << key
        end
      when :double
        through :key, field_pos do |key, values|
          values.flatten.compact.uniq.each{|value| data[value] ||= []; data[value] << key}
        end
      when :list
        through :key, field_pos do |key, values|
          next if values.nil?
          values.compact.uniq.each{|value| data[value] ||= []; data[value] << key}
        end
      when :flat
        through :key, field_pos do |key, values|
          next if values.nil?
          values.compact.uniq.each{|value| data[value] ||= []; data[value] << key}
        end
      end

    end

    if rename
      Log.debug("Using renames during annotation counts")
      Hash[*data.keys.zip(data.values.collect{|l| l.collect{|e| rename.include?(e)? rename[e] : e }.uniq.length }).flatten]
    else
      Hash[*data.keys.zip(data.values.collect{|l| l.uniq.length}).flatten]
    end
  end
end

#enrichment(list, fields = nil, options = {}) ⇒ Object



159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/rbbt/statistics/hypergeometric.rb', line 159

def enrichment(list, fields = nil, options = {})
  options = Misc.add_defaults options, :skip_missing => true, :background => nil
  background, skip_missing = Misc.process_options options, :background, :skip_missing

  list = list.compact.uniq

  if Array === background and not background.empty?
    filter
    add_filter(:key, background)
    if defined? AnnotatedArray and AnnotatedArray === list
      list = list.subset background
    else
      list = list & background
    end
  end

  with_unnamed do
    fields ||= self.fields.first
    options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true

    add_keys, rename, masked = Misc.process_options options, :add_keys, :rename, :masked

    Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"

    selected = select :key => list.uniq

    tsv_size = keys.length
    found = selected.keys.length
    Log.debug "Found #{found} of #{list.length} entities"

    if skip_missing
      total = found
      Log.debug "Using #{ found } as sample size; skipping missing"
    else
      total = list.length
      Log.debug "Using #{ list.length } as sample size"
    end

    counts = annotation_counts fields, options[:persist], :rename => rename, :masked => masked

    annotation_keys = Hash.new
    selected.with_unnamed do

      case type
      when :single
        selected.through :key, fields do |key, value|
          value = value.dup
          annotation_keys[value] ||= []
          annotation_keys[value] << key
        end

      when :double
        selected.through :key, fields do |key, values|
          values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value| 
            value = value.dup
            annotation_keys[value] ||= []
            annotation_keys[value] << key
          }
        end

      when :list
        selected.through :key, fields do |key, values|
          values.compact.uniq.reject{|value| value.empty?}.each{|value| 
            value = value.dup
            annotation_keys[value] ||= []
            annotation_keys[value] << key
          }
        end

      when :flat
        selected.through do |key, values|
          next if values.nil?
          values.compact.uniq.reject{|value| value.empty?}.each{|value| 
            value = value.dup
            annotation_keys[value] ||= []
            annotation_keys[value] << key
          }
        end
      end
    end

    if Array === background and not background.empty?
      reset_filters
      pop_filter
    end

    pvalues = {}
    annotation_keys.each do |annotation, elems|
      next if masked and masked.include? annotation
      elems = elems.collect{|elem| rename.include?(elem)? rename[elem] : elem }.compact.uniq if rename
      count = elems.length
      next if count < options[:min_support] or not counts.include? annotation
      #pvalues[annotation] = RSRuby.instance.phyper(count - 1, counts[annotation], tsv_size - counts[annotation], total, false).to_f
      pvalues[annotation] = Hypergeometric.hypergeometric(count, counts[annotation], tsv_size - counts[annotation], total)
    end

    pvalues = FDR.adjust_hash! pvalues if options[:fdr]

    pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]

    if add_keys
      tsv = TSV.setup(pvalues.keys.collect{|k| k.dup}, :key_field => fields, :fields => [], :type => :double)

      tsv.add_field 'p-value' do |annot, values|
        [pvalues[annot]]
      end

      tsv.add_field self.key_field do |annot, values|
        if list.respond_to? :annotate
          list.annotate annotation_keys[annot]
        else
          annotation_keys[annot]
        end
      end

      tsv
    else
      TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
    end

  end
end

#enrichment_for(tsv, field, options = {}) ⇒ Object



282
283
284
285
286
287
288
289
290
291
# File 'lib/rbbt/statistics/hypergeometric.rb', line 282

def enrichment_for(tsv, field, options = {} )
  tsv = tsv.tsv if Path === tsv
  index = TSV.find_traversal(self, tsv, :in_namespace => false, :persist_input => true)

  raise "Cannot traverse identifiers" if index.nil?

  source_keys = index.values_at(*self.keys).flatten.compact.uniq

  tsv.enrichment source_keys, field, options
end

#rank_enrichment(list, options = {}) ⇒ Object



524
525
526
# File 'lib/rbbt/statistics/random_walk.rb', line 524

def rank_enrichment(list, options = {})
  TSV.rank_enrichment(self, list, options)
end

#rank_product(fields, reverse = false, &block) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rbbt/statistics/rank_product.rb', line 43

def rank_product(fields, reverse = false, &block)
  tsv = self.slice(fields)

  if block_given?
    scores = fields.collect{|field| tsv.sort_by(field, true, &block)}
  else
    scores = fields.collect{|field| tsv.sort_by(field, true){|gene,values| tsv.type == :single ? values.to_f : values.flatten.first.to_f}}
  end
  positions = {}
  
  if reverse
    size = self.size
    tsv.keys.each do |entity|
      positions[entity] = scores.collect{|list| size - list.index(entity)}
    end
  else
    tsv.keys.each do |entity|
      positions[entity] = scores.collect{|list| list.index(entity) + 1}
    end
  end

  signature_sizes = fields.collect{|field| slice(field).values.select{|v| v and not (v.respond_to?(:empty?) and v.empty?)}.length} 

  score = RankProduct.score(positions, signature_sizes)

  score
end

#ranks_for(field) ⇒ Object



528
529
530
531
532
533
534
535
536
537
538
539
# File 'lib/rbbt/statistics/random_walk.rb', line 528

def ranks_for(field)
  ranks = TSV.setup({}, :key_field => self.key_field, :fields => ["Rank"], :type => :single, :cast => :to_i)
  sort_by(field, true).each_with_index do |k, i|
    ranks[k] = i
  end

  ranks.entity_options = entity_options
  ranks.entity_templates = entity_templates
  ranks.namespace = namespace

  ranks
end