Module: Misc

Defined in:
lib/rbbt/util/tar.rb,
lib/rbbt/util/misc.rb,
lib/rbbt/util/misc/lock.rb,
lib/rbbt/util/misc/math.rb,
lib/rbbt/util/misc/omics.rb,
lib/rbbt/util/misc/pipes.rb,
lib/rbbt/util/misc/format.rb,
lib/rbbt/util/misc/system.rb,
lib/rbbt/util/misc/inspect.rb,
lib/rbbt/util/misc/objects.rb,
lib/rbbt/util/misc/options.rb,
lib/rbbt/util/misc/development.rb,
lib/rbbt/util/misc/manipulation.rb,
lib/rbbt/util/misc/communication.rb

Overview

SOURCE: gist.github.com/sinisterchipmunk/1335041 Adapted for Rbbt

Constant Summary collapse

LOCK_MUTEX =
Mutex.new
LOCK_REPO_SERIALIZER =
Marshal
Log2Multiplier =
1.0 / Math.log(2.0)
Log10Multiplier =
1.0 / Math.log(10.0)
IUPAC2BASE =
{
  "A" => ["A"],
  "C" => ["C"],
  "G" => ["G"],
  "T" => ["T"],
  "U" => ["U"],
  "R" => "A or G".split(" or "),
  "Y" => "C or T".split(" or "),
  "S" => "G or C".split(" or "),
  "W" => "A or T".split(" or "),
  "K" => "G or T".split(" or "),
  "M" => "A or C".split(" or "),
  "B" => "C or G or T".split(" or "),
  "D" => "A or G or T".split(" or "),
  "H" => "A or C or T".split(" or "),
  "V" => "A or C or G".split(" or "),
  "N" => %w(A C T G),
}
BASE2COMPLEMENT =
{
  "A" => "T",
  "C" => "G",
  "G" => "C",
  "T" => "A",
  "U" => "A",
}
THREE_TO_ONE_AA_CODE =
{
  "ala" =>   "A",
  "arg" =>   "R",
  "asn" =>   "N",
  "asp" =>   "D",
  "cys" =>   "C",
  "glu" =>   "E",
  "gln" =>   "Q",
  "gly" =>   "G",
  "his" =>   "H",
  "ile" =>   "I",
  "leu" =>   "L",
  "lys" =>   "K",
  "met" =>   "M",
  "phe" =>   "F",
  "pro" =>   "P",
  "ser" =>   "S",
  "thr" =>   "T",
  "trp" =>   "W",
  "tyr" =>   "Y",
  "val" =>   "V"
}
CODON_TABLE =
{
  "ATT" => "I",
  "ATC" => "I",
  "ATA" => "I",
  "CTT" => "L",
  "CTC" => "L",
  "CTA" => "L",
  "CTG" => "L",
  "TTA" => "L",
  "TTG" => "L",
  "GTT" => "V",
  "GTC" => "V",
  "GTA" => "V",
  "GTG" => "V",
  "TTT" => "F",
  "TTC" => "F",
  "ATG" => "M",
  "TGT" => "C",
  "TGC" => "C",
  "GCT" => "A",
  "GCC" => "A",
  "GCA" => "A",
  "GCG" => "A",
  "GGT" => "G",
  "GGC" => "G",
  "GGA" => "G",
  "GGG" => "G",
  "CCT" => "P",
  "CCC" => "P",
  "CCA" => "P",
  "CCG" => "P",
  "ACT" => "T",
  "ACC" => "T",
  "ACA" => "T",
  "ACG" => "T",
  "TCT" => "S",
  "TCC" => "S",
  "TCA" => "S",
  "TCG" => "S",
  "AGT" => "S",
  "AGC" => "S",
  "TAT" => "Y",
  "TAC" => "Y",
  "TGG" => "W",
  "CAA" => "Q",
  "CAG" => "Q",
  "AAT" => "N",
  "AAC" => "N",
  "CAT" => "H",
  "CAC" => "H",
  "GAA" => "E",
  "GAG" => "E",
  "GAT" => "D",
  "GAC" => "D",
  "AAA" => "K",
  "AAG" => "K",
  "CGT" => "R",
  "CGC" => "R",
  "CGA" => "R",
  "CGG" => "R",
  "AGA" => "R",
  "AGG" => "R",
  "TAA" => "*",
  "TAG" => "*",
  "TGA" => "*",
}
BLOCK_SIZE =
1024 * 8
SKIP_TAG =
"[SKIP TAG]"
PIPE_MUTEX =
Mutex.new
OPEN_PIPE_IN =
[]
COLOR_LIST =
%w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
ARRAY_MAX_LENGTH =
1000
STRING_MAX_LENGTH =
ARRAY_MAX_LENGTH * 100
TSV_MAX_FIELDS =
100
TSV_MAX_ROWS =
100
HASH2MD5_MAX_STRING_LENGTH =
1000
HASH2MD5_MAX_ARRAY_LENGTH =
100
MUTEX_FOR_THREAD_EXCLUSIVE =
Mutex.new
PUSHBULLET_KEY =
begin
  if ENV["PUSHBULLET_KEY"]
    ENV["PUSHBULLET_KEY"]
  else
    config_api = File.join(ENV['HOME'], 'config/apps/pushbullet/apikey')
    if File.exist? config_api
      File.read(config_api).strip
    else
      nil
    end
  end
end

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.sensiblewrite_dirObject

Returns the value of attribute sensiblewrite_dir.



13
14
15
# File 'lib/rbbt/util/misc/pipes.rb', line 13

def sensiblewrite_dir
  @sensiblewrite_dir
end

.sensiblewrite_lock_dirObject

Returns the value of attribute sensiblewrite_lock_dir.



5
6
7
# File 'lib/rbbt/util/misc/pipes.rb', line 5

def sensiblewrite_lock_dir
  @sensiblewrite_lock_dir
end

Class Method Details

._convert_match_condition(condition) ⇒ Object



30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc.rb', line 30

def self._convert_match_condition(condition)
  return true if condition == 'true'
  return false if condition == 'false'
  return condition.to_regexp if condition[0] == "/"
  return [:cmp, $1, $2.to_f] if condition =~ /^([<>]=?)(.*)/
  return [:invert, _convert_match_condition(condition[1..-1].strip)] if condition[0] == "!"
  #return {$1 => $2.to_f} if condition =~ /^([<>]=?)(.*)/
  #return {false => _convert_match_condition(condition[1..-1].strip)} if condition[0] == "!"
  return condition
end

._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block) ⇒ Object



553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
# File 'lib/rbbt/util/misc/pipes.rb', line 553

def self._paste_streams(streams, output, lines = nil, sep = "\t", header = nil, &block)
  output.puts header if header
  streams = streams.collect do |stream|
    if defined? Step and Step === stream
      io = stream.get_stream 
      if io
        buffer_stream(io)
      else
        stream.join.path.open
      end
    else
      stream
    end
  end

  begin

    lines ||= streams.collect{|s| s.gets }
    keys = []
    parts = []
    lines.each_with_index do |line,i|
      if line.nil?
        keys[i] = nil
        parts[i] = []
      else
        key, *p = line.chomp.split(sep, -1) 
        keys[i] = key
        parts[i] = p
      end
    end
    sizes = parts.collect{|p| p.nil? ? 0 : p.length }
    last_min = nil

    while lines.compact.any?
      if block_given?
        min = keys.compact.sort(&block).first
      else
        min = keys.compact.sort.first
      end
      str = []
      threads = []
      keys.each_with_index do |key,i|
        case key
        when min
          if parts[i] == [SKIP_TAG]
            str << [sep * (sizes[i]-1)] if sizes[i] > 0
          else
            str << [parts[i] * sep]
          end

          line = lines[i] = streams[i].gets

          if line.nil? 
            keys[i] = nil
            parts[i] = nil
            streams[i].close unless streams[i].closed?
            streams[i].join if streams[i].respond_to?(:join) 
          else
            k, *p = line.chomp.split(sep, -1)
            keys[i] = k
            parts[i] = p
          end
        else
          str << [sep * (sizes[i]-1)] if sizes[i] > 0
        end
      end

      output.puts [min, str.flatten*sep] * sep
    end

    streams.each do |stream|
      stream.close unless stream.closed?
      stream.join if stream.respond_to?(:join) 
    end
  rescue 
    Log.exception $!
    streams.each do |stream|
      stream.abort if stream.respond_to? :abort
    end
    raise $!
  end
end

._untar_cmd(io, destination) ⇒ Object



86
87
88
89
90
# File 'lib/rbbt/util/tar.rb', line 86

def self._untar_cmd(io, destination)
  FileUtils.mkdir_p destination unless File.exist? destination
  CMD.cmd_log("tar xvf - -C '#{destination}'", :in => io)
  nil
end

._zip_fields(array, max = nil) ⇒ Object



67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/util/misc/objects.rb', line 67

def self._zip_fields(array, max = nil)
  return [] if array.nil? or array.empty? or (first = array.first).nil?
  max = array.collect{|l| l.length}.max if max.nil?
  rest = array[1..-1].collect{|v|
    v.length == 1 & max > 1 ? v * max : v
  }
  first = first * max if first.length == 1 and max > 1

  first.zip(*rest)
end

.add_defaults(options, defaults = {}) ⇒ Object



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/rbbt/util/misc/options.rb', line 120

def self.add_defaults(options, defaults = {})
  options ||= {}
  case
  when Hash === options
    new_options = options.dup
  when String === options
    new_options = string2hash options
  else
    raise "Format of '#{options.inspect}' not understood. It should be a hash"
  end

  defaults.each do |key, value|
    next if options.include? key

    new_options[key] = value 
  end

  new_options

  options.replace new_options
end

.add_GET_param(url, param, value) ⇒ Object



78
79
80
81
82
83
84
# File 'lib/rbbt/util/misc/options.rb', line 78

def self.add_GET_param(url, param, value)
  clean_url, param_str = url.split("?")
  hash = param_str.nil? ? {} : self.GET_params2hash(param_str)
  IndiferentHash.setup hash
  hash[param] = value
  clean_url << "?" << hash2GET_params(hash)
end

.add_libdir(dir = nil) ⇒ Object



3
4
5
6
# File 'lib/rbbt/util/misc/development.rb', line 3

def self.add_libdir(dir=nil)
  dir ||= File.join(Path.caller_lib_dir(caller.first), 'lib')
  $LOAD_PATH.unshift(dir) unless $LOAD_PATH.include? dir
end

.add_stream_filename(io, filename) ⇒ Object



718
719
720
721
722
723
724
725
# File 'lib/rbbt/util/misc/pipes.rb', line 718

def self.add_stream_filename(io, filename)
  if ! io.respond_to? :filename
    class << io
      attr_accessor :filename
    end
    io.filename = filename
  end
end

.append_zipped(current, new) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/rbbt/util/misc/objects.rb', line 51

def self.append_zipped(current, new)
  if current.empty?
    current.replace new.collect{|e| [e]}
  else
    current.each do |v|
      n = new.shift
      if Array === n
        v.concat new
      else
        v << n
      end
    end
  end
  current
end

.array2hash(array, default = nil) ⇒ Object



24
25
26
27
28
29
30
31
# File 'lib/rbbt/util/misc/options.rb', line 24

def self.array2hash(array, default = nil)
  hash = {}
  array.each do |key, value|
    value = default.dup if value.nil? and not default.nil?
    hash[key] = value
  end
  hash
end

.benchmark(repeats = 1, message = nil) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/rbbt/util/misc/development.rb', line 32

def self.benchmark(repeats = 1, message = nil)
  require 'benchmark'
  res = nil
  begin
    measure = Benchmark.measure do
      repeats.times do
        res = yield
      end
    end
    if message
      puts "#{message }: #{ repeats } repeats"
    else
      puts "Benchmark for #{ repeats } repeats"
    end
    puts measure
  rescue Exception
    puts "Benchmark aborted"
    raise $!
  end
  res
end

.binary_include?(array, elem) ⇒ Boolean

Returns:

  • (Boolean)


111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/rbbt/util/misc/manipulation.rb', line 111

def self.binary_include?(array, elem)
  upper = array.size - 1
  lower = 0

  return -1 if upper < lower

  while(upper >= lower) do
    idx = lower + (upper - lower) / 2
    value = array[idx]

    case elem <=> value
    when 0
      return true
    when -1
      upper = idx - 1
    when 1
      lower = idx + 1
    else
      raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
    end
  end

  return false
end

.bootstrap(elems, num = nil, options = {}, &block) ⇒ Object



310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/rbbt/util/misc/development.rb', line 310

def self.bootstrap(elems, num = nil, options = {}, &block)
  IndiferentHash.setup options

  num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil if defined?(Rbbt::Config)
  num = :current if num.nil?
  cpus = case num
         when :current
           n = Etc.nprocessors
           n = elems.length / 2 if n > elems.length/2

           if $BOOTSTRAPPED_CURRENT && $BOOTSTRAPPED_CURRENT + n > Etc.nprocessors
             1
           else
             n
           end
         when String
           num.to_i
         when Integer
           if num < 100
             num
           else
             32000 / num
           end
         else
           raise "Parameter 'num' not understood: #{Misc.fingerprint num}"
         end


  options = Misc.add_defaults options, :respawn => true, :cpus => cpus
  options = Misc.add_defaults options, :bar => "Bootstrap in #{ options[:cpus] } cpus: #{ Misc.fingerprint Annotated.purge(elems) }"
  respawn = options[:respawn] and options[:cpus] and options[:cpus].to_i > 1

  index = (0..elems.length-1).to_a.collect{|v| v.to_s }

  TSV.traverse index, options do |pos|
    if num == :current
      $BOOTSTRAPPED_CURRENT ||= n 
      $BOOTSTRAPPED_CURRENT += 0 
    end
    elem = elems[pos.to_i]
    elems.annotate elem if elems.respond_to? :annotate
    res = begin
            yield elem
          rescue Interrupt
            Log.warn "Process #{Process.pid} was aborted"
            raise $!
          end
    res = nil unless options[:into]
    raise RbbtProcessQueue::RbbtProcessQueueWorker::Respawn, res if respawn == :always and cpus > 1
    res
  end
end

.bootstrap_in_threads(elems, num = :current, options = {}, &block) ⇒ Object



363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
# File 'lib/rbbt/util/misc/development.rb', line 363

def self.bootstrap_in_threads(elems, num = :current, options = {}, &block)
  IndiferentHash.setup options
  num = :current if num.nil?
  threads = case num
         when :current
          10
         when String
           num.to_i
         when Integer
           if num < 100
             num
           else
             32000 / num
           end
         else
           raise "Parameter 'num' not understood: #{Misc.fingerprint num}"
         end


  options = Misc.add_defaults options, :respawn => true, :threads => threads
  options = Misc.add_defaults options, :bar => "Bootstrap in #{ options[:threads] } threads: #{ Misc.fingerprint Annotated.purge(elems) }"

  index = (0..elems.length-1).to_a.collect{|v| v.to_s }
  TSV.traverse index, options do |pos|
    elem = elems[pos.to_i]
    elems.annotate elem if elems.respond_to? :annotate
    begin
      res = yield elem
    rescue Interrupt
      Log.warn "Process #{Process.pid} was aborted"
      raise $!
    end
    res = nil unless options[:into]
    res
  end
end

.break_lines(text, char_size = 80) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/util/misc/inspect.rb', line 7

def self.break_lines(text, char_size=80)
  text = text.gsub("\n", " ")
  lines = []
  line = []
  text.split(/([\s\-]+)/).each do |part|
    if line.join("").length + part.length > char_size
      lines << line * ""
      line = []
    end
    line << part
  end

  lines << line * ""

  lines.flatten.collect{|l| l.strip} * "\n"
end

.buffer_stream(stream) ⇒ Object



547
548
549
550
551
# File 'lib/rbbt/util/misc/pipes.rb', line 547

def self.buffer_stream(stream)
  sout, sin = Misc.pipe
  Misc.consume_stream(stream, true, sin)
  sout
end

.camel_case(string) ⇒ Object



85
86
87
88
89
90
# File 'lib/rbbt/util/misc/format.rb', line 85

def self.camel_case(string)
  return string if string !~ /_/ && string =~ /[A-Z]+.*/
    string.split(/_|(\d+)/).map{|e| 
      (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize) 
    }.join
end

.camel_case_lower(string) ⇒ Object



92
93
94
95
96
# File 'lib/rbbt/util/misc/format.rb', line 92

def self.camel_case_lower(string)
    string.split('_').inject([]){ |buffer,e| 
      buffer.push(buffer.empty? ? e.downcase : (e =~ /^[A-Z]{2,}$/ ? e : e.capitalize)) 
    }.join
end

.choose(array, select) ⇒ Object



97
98
99
# File 'lib/rbbt/util/misc/objects.rb', line 97

def self.choose(array, select)
  array.zip(select).select{|e,s| s }.collect{|e,s| e }
end

.chr_cmp_contigs(chr1, chr2, contigs) ⇒ Object



377
378
379
# File 'lib/rbbt/util/misc/omics.rb', line 377

def self.chr_cmp_contigs(chr1, chr2, contigs)
  contigs.index(chr1) <=> contigs.index(chr2)
end

.chr_cmp_strict(chr1, chr2) ⇒ Object



363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/rbbt/util/misc/omics.rb', line 363

def self.chr_cmp_strict(chr1, chr2)
  chr1 = chr1.sub(/^chr/, '')
  chr2 = chr2.sub(/^chr/, '')
  if (m1 = chr1.match(/(\d+)$/)) && (m2 = chr2.match(/(\d+)$/))
    m1[1].to_i <=> m2[1].to_i
  elsif chr1 =~ /\d+$/
    -1
  elsif chr2 =~ /\d+$/
    1
  else
    chr1 <=> chr2
  end
end

.chunk(array, num) ⇒ Object



196
197
198
199
200
201
202
203
204
# File 'lib/rbbt/util/misc/development.rb', line 196

def self.chunk(array, num)
  total = array.length
  current = 0
  while current < total
    last = current + num - 1
    yield array[current..last]
    current = last + 1
  end
end

.collapse_ranges(ranges) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/manipulation.rb', line 2

def self.collapse_ranges(ranges)
  processed = []
  last = nil
  final = []
  ranges.sort_by{|range| range.begin }.each do |range|
    rbegin = range.begin
    rend = range.end
    if last.nil? or rbegin > last
      processed << [rbegin, rend]
      last = rend
    else
     new_processed = []
      processed.each do |pbegin,pend|
        if pend < rbegin
          final << [pbegin, pend]
        else
          eend = [rend, pend].max
          new_processed << [pbegin, eend]
          break
        end
      end
      processed = new_processed
      last = rend if rend > last
    end
  end

  final.concat processed
  final.collect{|b,e| (b..e)}
end

.collapse_stream(s, line = nil, sep = "\t", header = nil, &block) ⇒ Object



494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
# File 'lib/rbbt/util/misc/pipes.rb', line 494

def self.collapse_stream(s, line = nil, sep = "\t", header = nil, &block)
  sep ||= "\t"
  Misc.open_pipe do |sin|
    sin.puts header if header
    process_stream(s) do |s|
      line ||= s.gets

      current_parts = []
      while line 
        key, *parts = line.chomp.split(sep, -1)
        case
        when key.nil?
        when current_parts.nil?
          current_parts = parts
          current_key = key
        when current_key == key
          parts.each_with_index do |part,i|
            if current_parts[i].nil?
              current_parts[i] = "|" << part
            else
              current_parts[i] = current_parts[i] << "|" << part
            end
          end

          (parts.length..current_parts.length-1).to_a.each do |pos|
            current_parts[pos] = current_parts[pos] << "|" << ""
          end
        when current_key.nil?
          current_key = key
          current_parts = parts
        when current_key != key
          if block_given?
            res = block.call(current_parts)
            sin.puts [current_key, res] * sep
          else
            sin.puts [current_key, current_parts].flatten * sep
          end 
          current_key = key
          current_parts = parts
        end
        line = s.gets
      end

      if block_given?
        res = block.call(current_parts)
        sin.puts [current_key, res] * sep
      else
        sin.puts [current_key, current_parts].flatten * sep
      end unless current_key.nil?
    end
  end
end

.colors_for(list) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/util/misc/format.rb', line 4

def self.colors_for(list)
  unused = COLOR_LIST.dup

  used = {}
  colors = list.collect do |elem|
    if used.include? elem
      used[elem]
    else
      color = unused.shift
      used[elem]=color
      color
    end
  end

  [colors, used]
end

.common_path(dir, file) ⇒ Object



56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/rbbt/util/misc/system.rb', line 56

def self.common_path(dir, file)
  file = File.expand_path file
  dir = File.expand_path dir

  return true if file == dir
  while File.dirname(file) != file
    file = File.dirname(file)
    return true if file == dir
  end

  return false
end

.compare_lines(stream1, stream2, args, sort = false) ⇒ Object



681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
# File 'lib/rbbt/util/misc/pipes.rb', line 681

def self.compare_lines(stream1, stream2, args, sort = false)
  if sort
    stream1 = Misc.sort_stream stream1
    stream2 = Misc.sort_stream stream2
    compare_lines(stream1, stream2, args, false)
  else
    erase = []

    if Path === stream1 or (String === stream1 and File.exist? stream1)
      file1 = stream1
    else
      file1 = TmpFile.tmp_file
      erase << file1
      Misc.consume_stream(TSV.get_stream(stream1), false, file1)
    end

    if Path === stream2 or (String === stream2 and File.exist? stream2)
      file2 = stream2
    else
      file2 = TmpFile.tmp_file
      erase << file2
      Misc.consume_stream(TSV.get_stream(stream2), false, file2)
    end

    CMD.cmd("env LC_ALL=C comm #{args} '#{file1}' '#{file2}'", :pipe => true, :post => Proc.new{ erase.each{|f| FileUtils.rm f } }) 
  end
end

.consolidate(list) ⇒ Object



40
41
42
43
44
45
46
47
48
49
# File 'lib/rbbt/util/misc/objects.rb', line 40

def self.consolidate(list)
  list.inject(nil){|acc,e|
    if acc.nil?
      acc = e
    else
      acc.concat e
      acc
    end
  }
end

.consume_stream(io, in_thread = false, into = nil, into_close = true, &block) ⇒ Object



290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/rbbt/util/misc/pipes.rb', line 290

def self.consume_stream(io, in_thread = false, into = nil, into_close = true, &block)
  return if Path === io
  return unless io.respond_to? :read 

  if io.respond_to? :closed? and io.closed?
    io.join if io.respond_to? :join
    return
  end

  if in_thread
    Thread.new(Thread.current) do |parent|
      begin
        consume_stream(io, false, into, into_close)
      rescue Exception
        parent.raise $!
      end
    end
  else
    if into
      Log.medium "Consuming stream #{Misc.fingerprint io} -> #{Misc.fingerprint into}"
    else
      Log.medium "Consuming stream #{Misc.fingerprint io}"
    end

    begin
      into = into.find if Path === into
      if String === into 
        dir = File.dirname(into)
        Open.mkdir dir unless Open.exists?(dir)
        into_path, into = into, Open.open(into, :mode => 'w') 
      end
      into.sync = true if IO === into
      into_close = false unless into.respond_to? :close
      io.sync = true

      begin
        while c = io.readpartial(BLOCK_SIZE)
          into << c if into
        end
      rescue EOFError
      end

      io.join if io.respond_to? :join
      io.close unless io.closed?
      into.close if into and into_close and not into.closed?
      into.join if into and into_close and into.respond_to?(:joined?) and not into.joined?
      block.call if block_given?

      #Log.medium "Done consuming stream #{Misc.fingerprint io}"
    rescue Aborted
      Log.medium "Consume stream aborted #{Misc.fingerprint io}"
      io.abort if io.respond_to? :abort
      #io.close unless io.closed?
      FileUtils.rm into_path if into_path and File.exist? into_path
    rescue Exception
      Log.medium "Exception consuming stream: #{Misc.fingerprint io}: #{$!.message}"
      io.abort $! if io.respond_to? :abort
      FileUtils.rm into_path if into_path and File.exist? into_path
      raise $!
    end
  end
end

.correct_icgc_mutation(pos, ref, mut_str) ⇒ Object



119
120
121
122
123
124
# File 'lib/rbbt/util/misc/omics.rb', line 119

def self.correct_icgc_mutation(pos, ref, mut_str)
  mut = mut_str
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
  mut = "+" << mut if ref == '-'
  [pos, [mut]]
end

.correct_mutation(pos, ref, mut_str) ⇒ Object



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/rbbt/util/misc/omics.rb', line 126

def self.correct_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')
  muts.collect!{|m| m == '<DEL>' ? '-' : m }

  ref = '' if ref == '-'
  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    raise "REF nil" if ref.nil?
    pos = pos + 1
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    m = '' if m == '-'
    case
    when ref.empty?
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    else
      if ref == '-'
        res = '+' + m
      else
        res = '-' * ref.length
        res << m unless m == '-'
      end
      Log.debug{"Non-standard annotation: #{[ref, m]} (#{ muts }) => #{ res }"}

      res
    end
  end

  [pos, muts]
end

.correct_vcf_mutation(pos, ref, mut_str) ⇒ Object



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/rbbt/util/misc/omics.rb', line 163

def self.correct_vcf_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')
  muts.collect!{|m| m == '<DEL>' ? '-' : m }

  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    raise "REF nil" if ref.nil?
    pos = pos + 1 unless ref.empty?
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    case
    when (ref.empty? or ref == '-')
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    when (ref == m)
      nil
    else
      if ref == '-'
        res = '+' + m
      else
        if ref[0] == m[0]
          res = '+' << m[1..-1]
        else
          res = '-' * ref.length
          res << m unless m == '-'
        end
      end
      Log.debug{"Non-standard annotation: #{[ref, m]} (#{ muts }) => #{ res }"}

      res
    end
  end.compact

  [pos, muts]
end

.counts(array) ⇒ Object



78
79
80
81
82
83
84
85
86
# File 'lib/rbbt/util/misc/math.rb', line 78

def self.counts(array)
  counts = {}
  array.each do |e|
    counts[e] ||= 0
    counts[e] += 1
  end

  counts
end

.digest(text) ⇒ Object



169
170
171
# File 'lib/rbbt/util/misc/inspect.rb', line 169

def self.digest(text)
  Digest::MD5.hexdigest(text)
end

.divide(array, num) ⇒ Object

Divides the array into num chunks of the same size by placing one element in each chunk iteratively.



208
209
210
211
212
213
214
215
216
217
# File 'lib/rbbt/util/misc/development.rb', line 208

def self.divide(array, num)
  num = 1 if num == 0
  chunks = []
  num.to_i.times do chunks << [] end
  array.each_with_index{|e, i|
    c = i % num
    chunks[c] << e
  }
  chunks
end

.do_once(&block) ⇒ Object



126
127
128
129
130
131
# File 'lib/rbbt/util/misc/development.rb', line 126

def self.do_once(&block)
  return nil if $__did_once
  $__did_once = true
  yield
  nil
end

.dup_stream(stream) ⇒ Object



269
270
271
# File 'lib/rbbt/util/misc/pipes.rb', line 269

def self.dup_stream(stream)
  dup_stream_multiple(stream, 1).first
end

.dup_stream_multiple(stream, num = 1) ⇒ Object



252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/rbbt/util/misc/pipes.rb', line 252

def self.dup_stream_multiple(stream, num = 1)
  stream_dup = stream.dup
  if stream.respond_to? :annotate
    stream.annotate stream_dup
    stream.clear
  end
  tee1, *rest = Misc.tee_stream stream_dup, num + 1
  stream.reopen(tee1)

  #ToDo: I can't explain why the @threads variable appears with the value of
  # @filename
  stream.instance_variable_set(:@threads, nil) if stream.instance_variables.include?(:@threads)

  tee1.annotate(stream)
  rest
end

.ensembl_server(organism) ⇒ Object



321
322
323
324
325
326
327
328
# File 'lib/rbbt/util/misc/omics.rb', line 321

def self.ensembl_server(organism)
  date = organism.split("/")[1]
  if date.nil?
    "www.ensembl.org"
  else
    "#{ date }.archive.ensembl.org"
  end
end

.env_add(var, value, sep = ":", prepend = true) ⇒ Object



19
20
21
22
23
24
25
26
27
# File 'lib/rbbt/util/misc/system.rb', line 19

def self.env_add(var, value, sep = ":", prepend = true)
  ENV[var] ||= ""
  return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
    if prepend
      ENV[var] = value + sep + ENV[var]
    else
      ENV[var] += sep + ENV[var]
    end
end

.field_position(fields, field, quiet = false) ⇒ Object

Raises:



101
102
103
104
105
106
107
108
# File 'lib/rbbt/util/misc/objects.rb', line 101

def self.field_position(fields, field, quiet = false)
  return field if Integer === field or Range === field
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
  fields.each_with_index{|f,i| return i if f == field}
  field_re = Regexp.new /^#{field}$/i
  fields.each_with_index{|f,i| return i if f =~ field_re}
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
end

.file2md5(file) ⇒ Object



418
419
420
421
422
423
424
425
426
427
428
429
# File 'lib/rbbt/util/misc/inspect.rb', line 418

def self.file2md5(file)
  if File.exist?(file + '.md5')
    Open.read(file + '.md5')
  else
    md5 = CMD.cmd("md5sum '#{file}'").read.strip.split(" ").first
    begin
      Open.write(file + '.md5', md5)
    rescue
    end
    md5
  end
end

.fingerprint(obj) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/rbbt/util/misc/inspect.rb', line 45

def self.fingerprint(obj)
  case obj
  when nil
    "nil"
  when (defined? Step and Step)
    "<Step:"  << (obj.short_path || Misc.fingerprint([obj.task.name, obj.inputs])) << ">"
  when TrueClass
    "true"
  when FalseClass
    "false"
  when Symbol
    ":" << obj.to_s
  when String
    if obj.length > 100
      digest = Misc.digest(obj)
      "'" << obj.slice(0,30) << "<...#{obj.length} - #{digest[0..4]}...>" << obj.slice(-10,30)<< "'"
    else 
      "'" << obj << "'"
    end
  when (defined? AnnotatedArray and AnnotatedArray)
    "<A: #{fingerprint Annotated.purge(obj)} #{fingerprint obj.info}>"
  when (defined? TSV and TSV::Parser)
    filename = obj.filename
    filename = "STDIN(#{rand})" if filename == '-'
    "<TSVStream:" + (filename || "NOFILENAME") + "--" << Misc.fingerprint(obj.options) << ">"
  when IO
    (obj.respond_to?(:filename) and obj.filename ) ? "<IO:" + (obj.filename || obj.inspect + rand(100000)) + ">" : obj.inspect
  when File
    "<File:" + obj.path + ">"
  when NamedArray
    fields = obj.fields
    fields = fields.collect if NamedArray === fields
    "[<NamedArray: fields=#{fingerprint fields} -- values=#{fingerprint obj[0..-1]}]"
  when Array
    if (length = obj.length) > 10
      "[#{length}--" <<  (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
    else
      "[" << (obj.collect{|e| fingerprint(e) } * ", ") << "]"
    end
  when (defined? TSV and TSV)
    obj.with_unnamed do
      "TSV:{"<< fingerprint(obj.all_fields|| []) << ";" << fingerprint(obj.keys) << "}"
    end
  when Hash
    if obj.length > 10
      "H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
    else
      new = "{"
      obj.each do |k,v|
        new << fingerprint(k) << '=>' << fingerprint(v) << ' '
      end
      if new.length > 1
         new[-1] =  "}"
      else
        new << '}'
      end
      new
    end
  when Float
    if obj.abs > 10
      "%.1f" % obj
    elsif obj.abs > 1
      "%.3f" % obj
    else
      "%.6f" % obj
    end
  else
    obj.to_s
  end
end

.fixascii(string) ⇒ Object



152
153
154
155
156
157
158
# File 'lib/rbbt/util/misc/format.rb', line 152

def self.fixascii(string)
  if string.respond_to?(:encode)
    self.fixutf8(string).encode("ASCII-8BIT") 
  else
    string
  end
end

.fixutf8(string) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
175
176
# File 'lib/rbbt/util/misc/format.rb', line 164

def self.fixutf8(string)
  return nil if string.nil?
  return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
                   (string.respond_to?(:valid_encoding) && string.valid_encoding)

  if string.respond_to?(:encode)
    string.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
  else
    require 'iconv'
    @@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
    @@ic.iconv(string)
  end
end

.format_definition_list(defs, size = 80, indent = 20, color = :yellow, sep = "\n\n") ⇒ Object



76
77
78
79
80
81
82
83
# File 'lib/rbbt/util/misc/format.rb', line 76

def self.format_definition_list(defs, size = 80, indent = 20, color = :yellow, sep = "\n\n")
  entries = []
  defs.each do |dt,dd|
    text = format_definition_list_item(dt,dd,size,indent,color)
    entries << text
  end
  entries * sep 
end

.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'lib/rbbt/util/misc/format.rb', line 58

def self.format_definition_list_item(dt, dd, size = 80, indent = 20, color = :yellow)
  dd = "" if dd.nil?
  dt = Log.color color, dt if color
  dt = dt.to_s  unless dd.empty?
  len = Log.uncolor(dt).length

  if indent < 0
    text = format_paragraph(dd, size, indent.abs-1, 0)
    text = dt << "\n" << text
  else
    offset = len - indent
    offset = 0 if offset < 0
    text = format_paragraph(dd, size, indent.abs+1, offset)
    text[0..len-1] = dt
  end
  text
end

.format_paragraph(text, size = 80, indent = 0, offset = 0) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/rbbt/util/misc/format.rb', line 28

def self.format_paragraph(text, size = 80, indent = 0, offset = 0)
  i = 0
  size = size + offset + indent
  re = /((?:\n\s*\n\s*)|(?:\n\s*(?=\*)))/
    text.split(re).collect do |paragraph|
    i += 1
    str = if i % 2 == 1
            words = paragraph.gsub(/\s+/, "\s").split(" ")
            lines = []
            line = " "*offset
            word = words.shift
            while word
              word = word[0..size-indent-offset-4] + '...' if word.length >= size - indent - offset
              while word and Log.uncolor(line).length + Log.uncolor(word).length <= size - indent
                line << word << " "
                word = words.shift
              end
              offset = 0
              lines << ((" " * indent) << line[0..-2])
              line = ""
            end
            (lines * "\n")
          else
            paragraph
          end
    offset = 0
    str
    end*""
end

.format_seconds(time, extended = false) ⇒ Object



21
22
23
24
25
26
# File 'lib/rbbt/util/misc/format.rb', line 21

def self.format_seconds(time, extended = false)
  seconds = time.to_i
  str = [seconds/3600, seconds/60 % 60, seconds % 60].map{|t|  "%02i" % t }.join(':')
  str << ".%02i" % ((time - seconds) * 100) if extended
  str
end

.genomic_location_cmp(gpos1, gpos2, sep = ":") ⇒ Object



381
382
383
384
385
386
387
388
389
390
391
# File 'lib/rbbt/util/misc/omics.rb', line 381

def self.genomic_location_cmp(gpos1, gpos2, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr1 <=> chr2
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_location_cmp_contigs(gpos1, gpos2, contigs, sep = ":") ⇒ Object



405
406
407
408
409
410
411
412
413
414
415
# File 'lib/rbbt/util/misc/omics.rb', line 405

def self.genomic_location_cmp_contigs(gpos1, gpos2, contigs, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr_cmp_contigs(chr1, chr2, contigs)
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_location_cmp_strict(gpos1, gpos2, sep = ":") ⇒ Object



393
394
395
396
397
398
399
400
401
402
403
# File 'lib/rbbt/util/misc/omics.rb', line 393

def self.genomic_location_cmp_strict(gpos1, gpos2, sep = ":")
  chr1, _sep, pos1 = gpos1.partition(sep)
  chr2, _sep, pos2 = gpos2.partition(sep)
  cmp = chr_cmp_strict(chr1, chr2)
  case cmp
  when 0
    pos1.to_i <=> pos2.to_i
  else
    cmp
  end
end

.genomic_mutations_to_BED(mutations, chr_prefix = false, sort_order = :normal) ⇒ Object



524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
# File 'lib/rbbt/util/misc/omics.rb', line 524

def self.genomic_mutations_to_BED(mutations, chr_prefix = false, sort_order = :normal)
  io = if Array === sort_order

         case chr_prefix.to_s.downcase
         when "remove"
           sort_order = sort_order.collect{|chr| chr.sub('chr', '') }  if sort_order.first.include?('chr') 
         when "true", "add"
           sort_order = sort_order.collect{|chr| "chr" + chr }  unless sort_order.first.include?('chr')
         end

         sort_genomic_locations_by_contig(mutations, sort_order)

       else

         case sort_order.to_s
         when 'strict'
           sort_genomic_locations_strict(mutations)
         else
           sort_genomic_locations(mutations)
         end

       end

  TSV.traverse io, :type => :array, :into => :stream do |mutation|
    chr, pos, mut, *rest = mutation.split(":")
    size = case mut
           when nil
             1
           when /^\+(.*)/
             1 + $1.length
           when /^\-(.*)/
             $1.length
           else
             mut.length
           end

    case chr_prefix.to_s.downcase
    when "true", "add"
      chr = "chr" + chr if ! chr.include?('chr')
    when "remove"
      chr = chr.sub("chr", '') if chr.include?('chr')
    end
    [chr, pos.to_i - 1, pos.to_i - 1 + size, mutation] * "\t"
  end
end

.get_filename(obj) ⇒ Object



431
432
433
434
435
436
437
438
439
440
441
# File 'lib/rbbt/util/misc/inspect.rb', line 431

def self.get_filename(obj)
  if obj.respond_to? :filename
    obj.filename
  elsif obj.respond_to? :path
    obj.path
  elsif (Path === obj || (String === obj && Misc.is_filename?(obj)))
    obj
  else
    nil
  end
end

.GET_params2hash(string) ⇒ Object



54
55
56
57
58
59
60
61
# File 'lib/rbbt/util/misc/options.rb', line 54

def self.GET_params2hash(string)
  hash = {}
  string.split('&').collect{|item|
    key, value = item.split("=").values_at 0, 1
    hash[key] = value.nil? ? "" : CGI.unescape(value)
  }
  hash
end

.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/rbbt/util/misc/math.rb', line 106

def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
  name1 ||= "list 1"
  name2 ||= "list 2"
  name3 ||= "list 3"

  sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}

  total = total.length if Array === total

  label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
  label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
  label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
  if total
    label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
  else
    label << "| INTERSECTION: #{sizes[6]}"
  end

  max = total || sizes.max
  sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
  url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
end

.gzip(tarfile) ⇒ Object

gzips the underlying string in the given StringIO, returning a new StringIO representing the compressed file.



66
67
68
69
70
71
72
73
74
75
# File 'lib/rbbt/util/tar.rb', line 66

def self.gzip(tarfile)
  gz = StringIO.new("")
  z = Zlib::GzipWriter.new(gz)
  z.write tarfile.string
  z.close # this is necessary!

  # z was closed to write the gzip footer, so
  # now we need a new StringIO
  StringIO.new gz.string
end

.hash2GET_params(hash) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/rbbt/util/misc/options.rb', line 63

def self.hash2GET_params(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer Numeric TrueClass FalseClass Module Class Object Array).include? v.class.to_s
    v = case 
        when Symbol === v
          v.to_s
        when Array === v
          v * ","
        else
          CGI.escape(v.to_s).gsub('%2F','/')
        end
    [ Symbol === k ? k.to_s : k,  v] * "="
  }.compact * "&"
end

.hash2md5(hash) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/rbbt/util/misc/inspect.rb', line 187

def self.hash2md5(hash)
  return "" if hash.nil? or hash.empty?

  str = ""
  keys = hash.keys
  keys = keys.clean_annotations if keys.respond_to? :clean_annotations
  keys = keys.sort_by{|k| k.to_s}

  if hash.respond_to? :unnamed
    unnamed = hash.unnamed
    hash.unnamed = true 
  end


  keys.each do |k|
    next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
    _v = hash[k]
    _k = k
    v = TSV === _v ? _v : Annotated.purge(_v)
    k = Annotated.purge(k)

    case
    when TrueClass === v
      str << k.to_s << "=>true" 
    when FalseClass === v
      str << k.to_s << "=>false" 
    when TSV === v
      str << k.to_s << "=>" << obj2md5(v)
    when Hash === v
      str << k.to_s << "=>" << hash2md5(v)
    when Symbol === v
      str << k.to_s << "=>" << v.to_s
    when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
      #str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << v[v.length-3..v.length+3] << v[-3..-1] << "; #{ v.length }"
      str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
    when String === v
      str << k.to_s << "=>" << v
    when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
      #str << k.to_s << "=>[" << (v[0..HASH2MD5_MAX_ARRAY_LENGTH] + v[v.length-3..v.length+3] + v[-3..-1]) * "," << "; #{ v.length }]"
      str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
    when TSV::Parser === v
      str << remove_long_items(v)
    when Array === v
      str << k.to_s << "=>[" << v * "," << "]"
    when File === v
      str << k.to_s << "=>[File:" << v.path << "]"
    else
      begin
        v_ins = v.inspect
      rescue
        v_ins = "#Object:" << v.object_id.to_s
      end

      case
      when v_ins =~ /:0x0/
        str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
      else
        str << k.to_s << "=>" << v_ins
      end
    end

    if _v and defined? Annotated and Annotated === _v and not (defined? AssociationItem and AssociationItem === _v)
      info = _v.info
      info = Annotated.purge(info)
      str << "_" << hash2md5(info) 
    end
  end
  hash.unnamed = unnamed if hash.respond_to? :unnamed

  if str.empty?
    ""
  else
    digest(str)
  end
end

.hash2string(hash) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/rbbt/util/misc/options.rb', line 46

def self.hash2string(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer Numeric TrueClass FalseClass Module Class Object).include? v.class.to_s
    [ Symbol === k ? ":" << k.to_s : k.to_s.chomp,
      Symbol === v ? ":" << v.to_s : v.to_s.chomp] * "="
  }.compact * "#"
end

.hash_to_html_tag_attributes(hash) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/rbbt/util/misc/options.rb', line 86

def self.hash_to_html_tag_attributes(hash)
  return "" if hash.nil? or hash.empty?
  hash.collect{|k,v| 
    case 
    when (k.nil? or v.nil? or (String === v and v.empty?))
      nil
    when Array === v
      [k,"'" << v * " " << "'"] * "="
    when String === v
      [k,"'" << v << "'"] * "="
    when Symbol === v
      [k,"'" << v.to_s << "'"] * "="
    when TrueClass === v
      [k,"'" << v.to_s << "'"] * "="
    when Numeric === v
      [k,"'" << v.to_s << "'"] * "="
    else
      nil
    end
  }.compact * " "
end

.hostnameObject



5
6
7
# File 'lib/rbbt/util/misc/system.rb', line 5

def self.hostname
  @hostanem ||= `hostname`.strip
end

.html_tag(tag, content = nil, params = {}) ⇒ Object



108
109
110
111
112
113
114
115
116
117
118
# File 'lib/rbbt/util/misc/options.rb', line 108

def self.html_tag(tag, content = nil, params = {})
  attr_str = hash_to_html_tag_attributes(params)
  attr_str = " " << attr_str if String === attr_str and attr_str != ""
  html = if content.nil?
    "<#{ tag }#{attr_str}/>"
  else
    "<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
  end

  html
end

.humanize(value, options = {}) ⇒ Object

source: gist.github.com/ekdevdes/2450285 author: Ethan Kramer (github.com/ekdevdes)



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/rbbt/util/misc/format.rb', line 110

def self.humanize(value, options = {})
  if options.empty?
    options[:format] = :sentence
  end

  values = value.to_s.split('_')
  values.each_index do |index|
    # lower case each item in array
    # Miguel Vazquez edit: Except for acronyms
    values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
  end
  if options[:format] == :allcaps
    values.each do |value|
      value.capitalize!
    end

    if options.empty?
      options[:seperator] = " "
    end

    return values.join " "
  end

  if options[:format] == :class
    values.each do |value|
      value.capitalize!
    end

    return values.join ""
  end

  if options[:format] == :sentence
    values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)

    return values.join " "
  end

  if options[:format] == :nocaps
    return values.join " "
  end
end

.humanize_list(list) ⇒ Object



178
179
180
181
182
183
184
185
# File 'lib/rbbt/util/misc/format.rb', line 178

def self.humanize_list(list)
  return "" if list.empty?
  if list.length == 1
    list.first
  else
    list[0..-2].collect{|e| e.to_s} * ", " << " and " << list[-1].to_s
  end
end

.in_delta?(a, b, delta = 0.0001) ⇒ Boolean

Returns:

  • (Boolean)


129
130
131
# File 'lib/rbbt/util/misc/math.rb', line 129

def self.in_delta?(a, b, delta = 0.0001)
  (a.to_f - b.to_f).abs < delta
end

.in_dir(dir) ⇒ Object

WARN: probably not thread safe…



83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/rbbt/util/misc/system.rb', line 83

def self.in_dir(dir)
  old_pwd = FileUtils.pwd
  res = nil
  begin
    FileUtils.mkdir_p dir unless File.exist?(dir)
    FileUtils.cd dir
    res = yield
  ensure
    FileUtils.cd old_pwd
  end
  res
end

.index_BED(source, destination, sorted = false) ⇒ Object



479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
# File 'lib/rbbt/util/misc/omics.rb', line 479

def self.index_BED(source, destination, sorted = false)

  pos_function = Proc.new do |k|
    k.split(":").values_at(1, 2).collect{|i| i.to_i}
  end
  if Open.exists? destination
    Persist::Sharder.new destination, false, "fwt", :pos_function => pos_function  do |key|
      key.split(":")[0]
    end
  else
    io = IO === io ? io : Open.open(source) 

    max_size = 0
    nio = Misc.open_pipe do |sin|
      while line = io.gets
        chr, start, eend, id, *rest = line.chomp.split("\t")
        l = id.length
        max_size = l if max_size < l
        chr = chr.sub('chr','')
        sin << [chr, start, eend, id] * "\t" << "\n"
      end
    end

    TmpFile.with_file do |tmpfile|
      Misc.consume_stream(nio, false, tmpfile)

      value_size = max_size
      destination = destination.find if Path === destination
      sharder = Persist::Sharder.new destination, true, "fwt", :value_size => value_size, :range => true, :pos_function => pos_function  do |key|
        key.split(":")[0]
      end

      TSV.traverse tmpfile, :type => :array, :bar => "Creating BED index for #{Misc.fingerprint source}" do |line|
        next if line.empty?
        chr, start, eend, id, *rest = line.split("\t")
        key = [chr, start, eend] * ":"
        sharder[key] = id
      end
      sharder.read

      sharder
    end
  end
end

.insist(times = 4, sleep = nil, msg = nil) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/rbbt/util/misc/development.rb', line 137

def self.insist(times = 4, sleep = nil, msg = nil)
  sleep_array = nil

  try = 0
  begin
    begin
      yield
    rescue Exception
      if Array === times
        sleep_array = times
        times = sleep_array.length
        sleep = sleep_array.shift
      end

      if sleep.nil?
        sleep_array = ([0] + [0.001, 0.01, 0.1, 0.5] * (times / 3)).sort[0..times-1]
        sleep = sleep_array.shift
      end
      raise $!
    end
  rescue TryAgain
    sleep sleep
    retry
  rescue StopInsist
    raise $!.exception
  rescue Aborted, Interrupt
    if msg
      Log.warn("Not Insisting after Aborted: #{$!.message} -- #{msg}")
    else
      Log.warn("Not Insisting after Aborted: #{$!.message}")
    end
    raise $!
  rescue Exception
    Log.exception $! if ENV["RBBT_LOG_INSIST"] == 'true'
    if msg
      Log.warn("Insisting after exception: #{$!.class} #{$!.message} -- #{msg}")
    elsif FalseClass === msg
      nil
    else
      Log.warn("Insisting after exception:  #{$!.class} #{$!.message}")
    end

    if sleep and try > 0
      sleep sleep
      sleep = sleep_array.shift || sleep if sleep_array
    else
      Thread.pass
    end

    try += 1
    retry if try < times
    raise $!
  end
end

.intercalate_streams(streams) ⇒ Object



664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
# File 'lib/rbbt/util/misc/pipes.rb', line 664

def self.intercalate_streams(streams)
  Misc.open_pipe do |sin|
    continue = true
    while continue
      lines = streams.collect{|stream| stream.eof? ? nil : stream.gets }.compact
      lines.each do |line|
        sin.puts line
      end
      continue = false if lines.empty?
    end
    streams.each do |stream| 
      stream.join if stream.respond_to? :join
      stream.close if stream.respond_to? :close and not stream.closed?
    end
  end
end

.intersect_sorted_arrays(a1, a2) ⇒ Object



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rbbt/util/misc/manipulation.rb', line 60

def self.intersect_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  intersect = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      intersect << e1
      e1, e2 = a1.shift, a2.shift
    when -1
      e1 = a1.shift while not e1.nil? and e1 < e2
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  intersect
end

.intersect_streams(f1, f2, out, sep = ":") ⇒ Object



421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
# File 'lib/rbbt/util/misc/omics.rb', line 421

def self.intersect_streams(f1, f2, out, sep=":")
  finish = false
  return if f1.eof? or f2.eof?
  line1, chr1, start1, eend1, rest1 = intersect_streams_read(f1,sep)
  line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
  while not finish
    cmp = intersect_streams_cmp_chr(chr1,chr2)
    case cmp
    when -1
      move = 1
    when 1
      move = 2
    else
      if eend1 < start2
        move = 1
      elsif eend2 < start1
        move = 2
      else
        pos2 = f2.pos

        sline2, schr2, sstart2, seend2, srest2 = line2, chr2, start2, eend2, rest2
        while chr1 == chr2 and eend1 >= start2
          out.puts line1 + "\t" + line2 if start1 <= eend2
          if f2.eof?
            chr2 = 'next2'
          else
            line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
          end
        end
        line2, chr2, start2, eend2, rest2 = sline2, schr2, sstart2, seend2, srest2
        f2.seek(pos2)
        move = 1
      end
    end

    case move
    when 1
      if f1.eof?
        finish = true
      else
        line1, chr1, start1, eend1, rest1 = intersect_streams_read(f1,sep)
      end
    when 2
      if f2.eof?
        finish = true
      else
        line2, chr2, start2, eend2, rest2 = intersect_streams_read(f2,sep)
      end
    end
  end
end

.intersect_streams_cmp_chr(chr1, chr2) ⇒ Object



417
418
419
# File 'lib/rbbt/util/misc/omics.rb', line 417

def self.intersect_streams_cmp_chr(chr1, chr2)
  chr1 <=> chr2
end

.intersect_streams_read(io, sep = ":") ⇒ Object



350
351
352
353
354
355
356
357
358
359
360
361
# File 'lib/rbbt/util/misc/omics.rb', line 350

def self.intersect_streams_read(io, sep=":")
  line = io.gets.chomp
  parts = line.split(sep, -1)
  chr, start, eend, *rest = parts
  start = start.to_i
  if eend =~ /^\d+(\t.*)?$/
    eend = eend.to_i
  else
    eend = start.to_i
  end
  [line,chr, start, eend, rest]
end

.is_filename?(string, need_to_exists = true) ⇒ Boolean Also known as: filename?

Returns:

  • (Boolean)


96
97
98
99
100
101
102
# File 'lib/rbbt/util/misc/system.rb', line 96

def self.is_filename?(string, need_to_exists = true)
  return false if string.nil?
  return true if defined? Path and Path === string
  return true if string.respond_to? :exists
  return true if String === string and ! string.include?("\n") and string.split("/").select{|p| p.length > 265}.empty? and (! need_to_exists || File.exist?(string))
  return false
end

.IUPAC_to_base(iupac) ⇒ Object



285
286
287
# File 'lib/rbbt/util/misc/omics.rb', line 285

def self.IUPAC_to_base(iupac)
  IUPAC2BASE[iupac]
end

.line_monitor_stream(stream, &block) ⇒ Object



761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
# File 'lib/rbbt/util/misc/pipes.rb', line 761

def self.line_monitor_stream(stream, &block)
  monitor, out = tee_stream stream
  monitor_thread = Thread.new do
    begin
      while line = monitor.gets
        block.call line
      end
    rescue
      Log.exception $!
      monitor.raise $!
      monitor.close unless monitor.closed?
      monitor.join if monitor.respond_to?(:join) && ! monitor.aborted?
      out.raise $! if out.respond_to?(:raise)
    ensure
      monitor.close unless monitor.closed?
      monitor.join if monitor.respond_to?(:join) && ! monitor.aborted?
    end
  end

  stream.annotate out if stream.respond_to? :annotate
  ConcurrentStream.setup out, :threads => monitor_thread
end

.lock(file, unlock = true, options = {}) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/rbbt/util/misc/lock.rb', line 21

def self.lock(file, unlock = true, options = {})
  unlock, options = true, unlock if Hash === unlock
  return yield if file.nil? and not Lockfile === options[:lock]

  file = file.find if Path === file
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exist? File.dirname(File.expand_path(file))


  begin
    case options[:lock]
    when Lockfile
      lockfile = options[:lock]
      lockfile.lock unless lockfile.locked?
    when FalseClass
      lockfile = nil
      unlock = false
    when Path, String
      lock_path = options[:lock].find
      lockfile = Lockfile.new(lock_path, options)
      lockfile.lock
    else
      lock_path = File.expand_path(file + '.lock')
      lockfile = Lockfile.new(lock_path, options)
      lockfile.lock
    end
  rescue Aborted, Interrupt
    raise LockInterrupted
  end

  res = nil

  begin
    res = yield lockfile
  rescue KeepLocked
    unlock = false
    res = $!.payload
  ensure
    if unlock 
      begin
        if lockfile.locked?
          lockfile.unlock 
        else
        end
      rescue Exception
        Log.warn "Exception unlocking: #{lockfile.path}"
        Log.exception $!
      end
    end
  end

  res
end

.lock_in_repo(repo, key, *args) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/rbbt/util/misc/lock.rb', line 76

def self.lock_in_repo(repo, key, *args)
  return yield file, *args if repo.nil? or key.nil?

  lock_key = "lock-" << key

  begin
    if repo[lock_key] and
      Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and 
      info["pid"] and not Misc.pid_exists?(info["pid"])

      Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
      repo.out lock_key 
    end
  rescue
    Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
    repo.out lock_key if repo.include? lock_key
  end

  while repo[lock_key]
    sleep 1
  end
  
  repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})

  res = yield lock_key, *args

  repo.delete lock_key

  res
end

.log10(x) ⇒ Object



10
11
12
# File 'lib/rbbt/util/misc/math.rb', line 10

def self.log10(x)
  Math.log(x) * Log10Multiplier
end

.log2(x) ⇒ Object



6
7
8
# File 'lib/rbbt/util/misc/math.rb', line 6

def self.log2(x)
  Math.log(x) * Log2Multiplier
end

.match_fields(field1, field2) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
# File 'lib/rbbt/util/misc/objects.rb', line 3

def self.match_fields(field1, field2)
  return true if field1 == field2 
  if m = field1.match(/\((.*)\)/)
    field1 = m[1]
  end

  if m = field2.match(/\((.*)\)/)
    field2 = m[1]
  end

  field1 == field2
end

.match_value(value, condition) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/rbbt/util/misc.rb', line 41

def self.match_value(value, condition)
  condition = _convert_match_condition(condition.strip) if String === condition

  case condition
  when Regexp
    !! value.match(condition)
  when NilClass, TrueClass
    value === TrueClass or (String === value and value.downcase == 'true')
  when FalseClass
    value === FalseClass or (String === value and value.downcase == 'false')
  when String
    Numeric === value ? value.to_f == condition.to_f : value == condition
  when Numeric
    value.to_f == condition.to_f
  when Array
    case condition.first
    when :cmp
      value.to_f.send(condition[1], condition[2])
    when :invert
      ! match_value(value, condition[1] )
    else
      condition.inject(false){|acc,e| acc = acc ? true : match_value(value, e) }
    end
  else
    raise "Condition not understood: #{Misc.fingerprint condition}"
  end
end

.max(list) ⇒ Object



14
15
16
17
18
19
20
21
# File 'lib/rbbt/util/misc/math.rb', line 14

def self.max(list)
  max = nil
  list.each do |v|
    next if v.nil?
    max = v if max.nil? or v > max
  end
  max
end

.mean(list) ⇒ Object



45
46
47
# File 'lib/rbbt/util/misc/math.rb', line 45

def self.mean(list)
  sum(list.compact.collect{|v| v.to_f } ) / list.compact.length
end

.median(array) ⇒ Object



49
50
51
52
53
# File 'lib/rbbt/util/misc/math.rb', line 49

def self.median(array)
  sorted = array.sort
  len = sorted.length
  (sorted[(len - 1) / 2] + sorted[len / 2]).to_f / 2
end

.memory_use(pid = nil) ⇒ Object



399
400
401
# File 'lib/rbbt/util/misc/development.rb', line 399

def self.memory_use(pid=nil)
  `ps -o rss -p #{pid || $$}`.strip.split.last.to_i
end

.memprofObject



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/rbbt/util/misc/development.rb', line 110

def self.memprof
  require 'memprof'
  Memprof.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    Memprof.stop
    print Memprof.stats
  end

  res
end

.merge_sorted_arrays(a1, a2) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rbbt/util/misc/manipulation.rb', line 79

def self.merge_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  new = []
  while true
    case
    when (e1 and e2)
      case e1 <=> e2
      when 0
        new << e1 
        e1, e2 = a1.shift, a2.shift
      when -1
        new << e1
        e1 = a1.shift
      when 1
        new << e2
        e2 = a2.shift
      end
    when e2
      new << e2
      new.concat a2
      break
    when e1
      new << e1
      new.concat a1
      break
    else
      break
    end
  end
  new
end

.min(list) ⇒ Object



23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/math.rb', line 23

def self.min(list)
  min = nil
  list.each do |v|
    next if v.nil?
    min = v if min.nil? or v < min
  end
  min
end

.mtime_str(path) ⇒ Object



267
268
269
270
271
272
273
274
# File 'lib/rbbt/util/misc/inspect.rb', line 267

def self.mtime_str(path)
  path = path.find if Path === path
  if File.exist? path
    "mtime: " << File.mtime(path).to_s
  else
    "mtime: not present"
  end
end

.name2basename(file) ⇒ Object



24
25
26
# File 'lib/rbbt/util/misc/inspect.rb', line 24

def self.name2basename(file)
  sanitize_filename(file.gsub("/",'>').gsub("~", '-'))
end

.notify(description, event = 'notification', key = nil) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/rbbt/util/misc/communication.rb', line 15

def self.notify(description, event='notification', key = nil)
  if PUSHBULLET_KEY.nil? and key.nil?
    Log.warn "Could not notify, no PUSHBULLET_KEY"
    return
  end

  Thread.new do
    application = 'rbbt'
    event ||= 'notification'
    key ||= PUSHBULLET_KEY
    `curl -s --header "Authorization: Bearer #{key}" -X POST https://api.pushbullet.com/v2/pushes --header 'Content-Type: application/json' --data-binary '{"type": "note", "title": "#{event}", "body": "#{description}"}'`
  end
end

.obj2digest(obj) ⇒ Object



404
405
406
407
408
409
410
411
412
# File 'lib/rbbt/util/misc/inspect.rb', line 404

def self.obj2digest(obj)
  str = obj2str(obj)

  if str.empty?
    ""
  else
    digest(str)
  end
end

.obj2md5(obj) ⇒ Object



414
415
416
# File 'lib/rbbt/util/misc/inspect.rb', line 414

def self.obj2md5(obj)
  obj2digest(obj)
end

.obj2str(obj) ⇒ Object



296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# File 'lib/rbbt/util/misc/inspect.rb', line 296

def self.obj2str(obj)
  _obj = obj
  obj = Annotated.purge(obj) if Annotated === obj

  str = case obj
        when nil
          'nil'
        when Numeric
          Float === obj && obj % 1 == 0 ? obj.to_i.to_s : obj.to_s
        when Symbol 
          obj.to_s
        when TrueClass
          'true'
        when FalseClass
          'false'
        when Hash
          "{"<< obj.collect{|k,v| obj2str(k) + '=>' << obj2str(v)}*"," << "}"
        when (defined?(Path) and Path)
          if defined?(Step) && Open.exists?(Step.info_file(obj))
            obj2str(Workflow.load_step(obj))
          elsif step_file?(obj)
            "Step file: " + obj
          else
            if obj.exists?
              if obj.directory?
                files = obj.glob("**/*")
                "directory: #{Misc.fingerprint(files)}"
              else
                "file: " << Open.realpath(obj) << "--" << mtime_str(obj)
              end
            else
              obj + " (file missing)"
            end
          end
        when String
          good_filename = Misc.is_filename?(obj, false) && ! %w(. ..).include?(obj) && %w(. /).include?(obj[0])
          if good_filename 
            obj = obj.dup
            obj.extend Path
            obj2str obj
          else
            obj = obj.chomp if String === obj
            if obj.length > HASH2MD5_MAX_STRING_LENGTH
              sample_large_obj(obj, HASH2MD5_MAX_STRING_LENGTH) << "--" << txt_digest_str(obj)
            else
              obj
            end
          end
        when Array
          if obj.length > HASH2MD5_MAX_ARRAY_LENGTH
            "[" << sample_large_obj(obj, HASH2MD5_MAX_ARRAY_LENGTH).collect{|v| obj2str(v)} * "," << "]"
          else
            "[" << obj.collect{|v| obj2str(v) } * "," << "]"
          end
        when TSV::Parser
          remove_long_items(obj)
        when File 
          if obj.respond_to? :filename and obj.filename
            if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
              obj2str(Workflow.load_step(obj.filename))
            else
              "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
            end
          else
            "<IO:" << obj.path << "--" << mtime_str(obj.path) << ">"
          end
        when (defined? Step and Step)
          "<IO:" << obj.short_path_real << ">"
        when IO
          if obj.respond_to? :filename and obj.filename
            if defined?(Step) && Open.exists?(Step.info_file(obj.filename))
              obj2str(Workflow.load_step(obj.filename))
            else
              "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
            end
          else

            if obj.respond_to? :obj2str
              obj.obj2str
            else
              class << obj
                attr_accessor :obj2str
              end
              obj.obj2str = obj.inspect + rand(1000000).to_s
            end
          end
        else
          if obj.respond_to? :filename and obj.filename
            "<IO:" << obj.filename << "--" << mtime_str(obj.filename) << ">"
          else
            obj_ins = obj.inspect
            obj_str = if obj_ins =~ /:0x0/
              obj_ins.gsub(/:0x[a-f0-9]+/,'')
            else
              obj_ins
            end
          end
        end

  if defined? Annotated and Annotated === _obj and not (defined? AssociationItem and AssociationItem === _obj)
    info = Annotated.purge(_obj.info)
    str << "_" << obj2str(info) 
  end

  str
end

.object_delta(*args) ⇒ Object



276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/rbbt/util/misc/development.rb', line 276

def self.object_delta(*args)
  res, delta = nil, nil
  MUTEX_FOR_THREAD_EXCLUSIVE.synchronize do
    pre = Set.new
    delta = Set.new

    GC.start
    ObjectSpace.each_object(*args) do |o|
      pre.add o
    end

    res = yield

    GC.start
    ObjectSpace.each_object(*args) do |o|
      delta.add o unless pre.include? o
    end

  end
  Log.info "Delta: #{delta.inspect}"
  res
end

.open_gz_pipeObject



784
785
786
787
788
789
790
791
# File 'lib/rbbt/util/misc/pipes.rb', line 784

def self.open_gz_pipe
  sout = Misc.open_pipe do |sin|
    yield sin
    sin.close
  end

  Open.gzip(sout)
end

.open_pipe(do_fork = false, close = true) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/rbbt/util/misc/pipes.rb', line 67

def self.open_pipe(do_fork = false, close = true)
  raise "No block given" unless block_given?

  sout, sin = Misc.pipe

  if do_fork

    #parent_pid = Process.pid
    pid = Process.fork {
      purge_pipes(sin)
      sout.close
      begin

        yield sin
        sin.close if close and not sin.closed? 

      rescue Exception
        Log.exception $!
        #Process.kill :INT, parent_pid
        Kernel.exit! -1
      end
      Kernel.exit! 0
    }
    sin.close

    ConcurrentStream.setup sout, :pids => [pid]
  else

    ConcurrentStream.setup sin, :pair => sout
    ConcurrentStream.setup sout, :pair => sin

    thread = Thread.new do 
      begin
        
        yield sin

        sin.close if close and not sin.closed? and not sin.aborted?

      rescue Aborted
        Log.medium "Aborted open_pipe: #{$!.message}"
        raise $!
      rescue Exception
        Log.medium "Exception in open_pipe: #{$!.message}"
        Log.exception $!
        begin
          sin.raise($!) if sin.respond_to? :raise
          sin.join if sin.respond_to? :join
        ensure
          raise $!
        end
      end
    end

    sin.threads = [thread]
    sout.threads = [thread]
  end

  sout
end

.ordered_divide(array, num) ⇒ Object

Divides the array into chunks of num same size by placing one element in each chunk iteratively.



221
222
223
224
225
226
227
228
229
230
231
# File 'lib/rbbt/util/misc/development.rb', line 221

def self.ordered_divide(array, num)
  last = array.length - 1
  chunks = []
  current = 0
  while current <= last
    next_current = [last, current + num - 1].min
    chunks << array[current..next_current]
    current = next_current + 1
  end
  chunks
end

.parse_cmd_params(str) ⇒ Object



3
4
5
6
7
8
9
# File 'lib/rbbt/util/misc/options.rb', line 3

def self.parse_cmd_params(str)
  return str if Array === str
  str.scan(/
           (?:["']([^"']*?)["']) |
           ([^"'\s]+)
  /x).flatten.compact
end

.parse_sql_values(txt) ⇒ Object



187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/rbbt/util/misc/format.rb', line 187

def self.parse_sql_values(txt)
  io = StringIO.new txt.strip

  values = []
  fields = []
  current = nil
  quoted = false
  while c = io.getc
    if quoted
      if c == "'"
        quoted = false
      else
        current << c
      end
    else
      case c
      when "("
        current = ""
      when ")"
        fields << current
        values << fields
        fields = []
        current = nil
      when ','
        if not current.nil?
          fields << current
          current = ""
        end
      when "'"
        quoted = true
      when ";"
        break
      else
        current << c
      end
    end
  end
  values
end

.paste_streams(streams, lines = nil, sep = "\t", header = nil, &block) ⇒ Object



636
637
638
639
640
641
642
# File 'lib/rbbt/util/misc/pipes.rb', line 636

def self.paste_streams(streams, lines = nil, sep = "\t", header = nil, &block)
  sep ||= "\t"
  num_streams = streams.length
  Misc.open_pipe do |sin|
    self._paste_streams(streams, sin, lines, sep, header, &block)
  end
end

.path_relative_to(basedir, path) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/rbbt/util/misc/system.rb', line 41

def self.path_relative_to(basedir, path)
  path = File.expand_path(path) unless path[0] == "/"
  basedir = File.expand_path(basedir) unless basedir[0] == "/"

  if path.index(basedir) == 0
    if basedir[-1] == "/"
      return path[basedir.length..-1]
    else
      return path[basedir.length+1..-1]
    end
  else
    return nil
  end
end

.pid_exists?(pid) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
12
13
14
15
16
17
# File 'lib/rbbt/util/misc/system.rb', line 9

def self.pid_exists?(pid)
  return false if pid.nil?
  begin
    Process.getpgid(pid.to_i)
    true
  rescue Errno::ESRCH
    false
  end
end

.pipeObject



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/rbbt/util/misc/pipes.rb', line 26

def self.pipe
  OPEN_PIPE_IN.delete_if{|pipe| pipe.closed? }
  res = PIPE_MUTEX.synchronize do
    sout, sin = IO.pipe
    OPEN_PIPE_IN << sin

    [sout, sin]
  end
  Log.debug{"Creating pipe #{[res.last.inspect, res.first.inspect] * " => "}"}
  res
end

.positional2hash(keys, *values) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/rbbt/util/misc/options.rb', line 11

def self.positional2hash(keys, *values)
  if Hash === values.last
    extra = values.pop
    inputs = Misc.zip2hash(keys, values)
    inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
    inputs = Misc.add_defaults inputs, extra
    inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
    inputs
  else
    Misc.zip2hash(keys, values)
  end
end

.pre_forkObject



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/util/misc/development.rb', line 8

def self.pre_fork
  Persist::CONNECTIONS.values.each do |db|
    db.close if db.write?
  end
  Log::ProgressBar::BARS.clear
  ObjectSpace.each_object(Mutex) do |m|
    begin
      m.unlock
    rescue ThreadError
    end if m.locked?
  end
end

.prepare_entity(entity, field, options = {}) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/rbbt/util/misc/objects.rb', line 16

def self.prepare_entity(entity, field, options = {})
  return entity unless defined? Entity
  return entity unless String === entity or Array === entity
  options ||= {}

  dup_array = options.delete :dup_array

  if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
    params = options.dup

    params[:format] ||= params.delete "format"
    params.merge!(:format => _format) unless _format.nil? or (params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?)))

    mod = Entity === field ? field : Entity.formats[field]

    entity = entity.dup
    entity = (entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) 

    entity = mod.setup(entity, params)
  end

  entity
end

.process_options(hash, *keys) ⇒ Object



142
143
144
145
146
147
148
149
150
151
# File 'lib/rbbt/util/misc/options.rb', line 142

def self.process_options(hash, *keys)
  defaults = keys.pop if Hash === keys.last
  hahs = Misc.add_defaults hash, defaults if defaults

  if keys.length == 1
    hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s) 
  else
    keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
  end
end

.process_stream(s) ⇒ Object



449
450
451
452
453
454
455
456
457
# File 'lib/rbbt/util/misc/pipes.rb', line 449

def self.process_stream(s)
  begin
    yield s
    s.join if s.respond_to? :join
  rescue
    s.abort if s.respond_to? :abort
    raise $!
  end
end

.process_to_hash(list) ⇒ Object



41
42
43
44
# File 'lib/rbbt/util/misc/options.rb', line 41

def self.process_to_hash(list)
  result = yield list
  zip2hash(list, result)
end

.profile(options = {}) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/rbbt/util/misc/development.rb', line 93

def self.profile(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::FlatPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_graph(options = {}) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/rbbt/util/misc/development.rb', line 75

def self.profile_graph(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    #result.eliminate_methods!([/annotated_array_clean_/])
    printer = RubyProf::GraphPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_html(options = {}) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/util/misc/development.rb', line 54

def self.profile_html(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::MultiPrinter.new(result)
    TmpFile.with_file do |dir|
      FileUtils.mkdir_p dir unless File.exist? dir
      printer.print(:path => dir, :profile => 'profile')
      CMD.cmd("firefox  -no-remote  '#{ dir }'")
    end
  end

  res
end

.proportions(array) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/rbbt/util/misc/math.rb', line 88

def self.proportions(array)
  total = array.length

  proportions = Hash.new 0

  array.each do |e|
    proportions[e] += 1.0 / total
  end

  class << proportions; self;end.class_eval do
    def to_s
      sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
    end
  end

  proportions
end

.pull_keys(hash, prefix) ⇒ Object



153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/rbbt/util/misc/options.rb', line 153

def self.pull_keys(hash, prefix)
  new = {}
  hash.keys.each do |key|
    if key.to_s =~ /#{ prefix }_(.*)/
      case
      when String === key
        new[$1] = hash.delete key
      when Symbol === key
        new[$1.to_sym] = hash.delete key
      end
    else
      if key.to_s == prefix.to_s
        new[key] = hash.delete key
      end
    end
  end

  new
end

.purge_pipes(*save) ⇒ Object



58
59
60
61
62
63
64
65
# File 'lib/rbbt/util/misc/pipes.rb', line 58

def self.purge_pipes(*save)
  PIPE_MUTEX.synchronize do
    OPEN_PIPE_IN.each do |pipe|
      next if save.include? pipe
      pipe.close unless pipe.closed?
    end
  end
end

.random_sample_in_range(total, size) ⇒ Object



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/rbbt/util/misc/development.rb', line 233

def self.random_sample_in_range(total, size)
  p = Set.new

  if size > total / 10
    template = (0..total - 1).to_a
    size.times do |i|
      pos = (rand * (total - i)).floor
      if pos == template.length - 1
        v = template.pop
      else
        v, n = template[pos], template[-1]
        template.pop
        template[pos] = n
      end
      p << v
    end
  else
    size.times do
      pos = nil
      while pos.nil?
        pos = (rand * total).floor
        if p.include? pos
          pos = nil
        end
      end
      p << pos
    end
  end
  p
end

.read_full_stream(io) ⇒ Object



277
278
279
280
281
282
283
284
285
286
287
288
# File 'lib/rbbt/util/misc/pipes.rb', line 277

def self.read_full_stream(io)
  str = ""
  begin
    while block = io.read(BLOCK_SIZE)
      str << block
    end
    io.join if io.respond_to? :join
  rescue
    io.abort if io.respond_to? :abort
  end
  str
end

.read_stream(stream, size) ⇒ Object



353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/rbbt/util/misc/pipes.rb', line 353

def self.read_stream(stream, size)
  str = nil
  Thread.pass while IO.select([stream],nil,nil,1).nil?
  while not str = stream.read(size)
    IO.select([stream],nil,nil,1) 
    Thread.pass
    raise ClosedStream if stream.eof?
  end

  while str.length < size
    raise ClosedStream if stream.eof?
    IO.select([stream],nil,nil,1)
    if new = stream.read(size-str.length)
      str << new
    end
  end
  str
end


70
71
72
73
74
75
76
77
78
79
80
# File 'lib/rbbt/util/misc/system.rb', line 70

def self.relative_link(source, target_dir)
  path = "."
  current = target_dir
  while ! Misc.common_path current, source
    current = File.dirname(current)
    path = File.join(path, '..')
    return nil if current == "/"
  end

  File.join(path, Misc.path_relative_to(current, source))
end

.release_pipes(*pipes) ⇒ Object



50
51
52
53
54
55
56
# File 'lib/rbbt/util/misc/pipes.rb', line 50

def self.release_pipes(*pipes)
  PIPE_MUTEX.synchronize do
    pipes.flatten.each do |pipe|
      pipe.close unless pipe.closed?
    end
  end
end

.remove_lines(stream1, stream2, sort) ⇒ Object



709
710
711
# File 'lib/rbbt/util/misc/pipes.rb', line 709

def self.remove_lines(stream1, stream2, sort)
  self.compare_lines(stream1, stream2, '-2 -3', sort)
end

.remove_long_items(obj) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/rbbt/util/misc/inspect.rb', line 117

def self.remove_long_items(obj)
  case
  when IO === obj
    remove_long_items("IO: " + (obj.respond_to?(:filename) ? (obj.filename || obj.inspect) : obj.inspect ))
  when obj.respond_to?(:path)
    remove_long_items("File: " + obj.path)
  when TSV::Parser === obj
    filename = obj.filename
    filename = "STDIN(rand-#{rand(10000000)})" if filename == '-'
    remove_long_items("TSV Stream: " + filename + " -- " << Misc.fingerprint(obj.options))
  when TSV === obj
    tsv = obj
    fields = tsv.fields

    if obj.size > TSV_MAX_ROWS
      tsv = obj.head(TSV_MAX_ROWS)
      tsv["Truncated rows at #{TSV_MAX_ROWS} (#{obj.size})"] = nil
    end

    if fields && fields.length > TSV_MAX_FIELDS
      tsv = obj.slice(fields[0..TSV_MAX_ROWS-1])
      tsv.add_field "Truncated at #{TSV_MAX_ROWS} (#{fields.length})" do
        nil
      end
    elsif fields.nil?
      new = tsv.annotate({})
      tsv.each do |k,v|
        new[k] = Misc.remove_long_items(v)
      end
      tsv = new
    end

    tsv
  when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
    remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH }/#{obj.length}")
  when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
    remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH }/#{obj.length}"])
  when (String === obj and obj.length > STRING_MAX_LENGTH)
    obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH}/#{obj.length}"
  when Hash === obj
    new = {}
    obj.each do |k,v|
      new[k] = remove_long_items(v)
    end
    new
  when Array === obj
    obj.collect do |e| remove_long_items(e) end
  else
    obj
  end
end

.remove_quoted_new_line(stream, quote = '"') ⇒ Object



757
758
759
# File 'lib/rbbt/util/misc/pipes.rb', line 757

def self.remove_quoted_new_line(stream, quote = '"')
  swap_quoted_character(stream, "\n", " ", quote)
end

.reset_do_onceObject



133
134
135
# File 'lib/rbbt/util/misc/development.rb', line 133

def self.reset_do_once
  $__did_once = false
end

.sample(ary, size, replacement = false) ⇒ Object



264
265
266
267
268
269
270
271
272
# File 'lib/rbbt/util/misc/development.rb', line 264

def self.sample(ary, size, replacement = false)
  if ary.respond_to? :sample
    ary.sample size
  else
    total = ary.length
    p = random_sample_in_range(total, size)
    ary.values_at *p
  end
end

.sample_large_obj(obj, max = 100) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
# File 'lib/rbbt/util/misc/inspect.rb', line 173

def self.sample_large_obj(obj, max = 100)
  length = obj.length
  head = obj[0..max/2]
  tail = obj[-max/2..-1]
  middle = (1..9).to_a.collect{|i| pos = (length / 10) * i + i; obj[pos-1..pos+1]}.flatten 
  if Array === obj 
    head + middle + tail + ["LENGTH: #{obj.length}"]
  else
    head << "..." << middle*"," << "..." << tail << "(#{obj.length})"
  end
end

.sanitize_filename(filename, length = 254) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/rbbt/util/misc/inspect.rb', line 28

def self.sanitize_filename(filename, length = 254)
  if filename.length > length
    if filename =~ /(\..{2,9})$/
      extension = $1
    else
      extension = ''
    end

    post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension

    filename = filename[0..(length - post_fix.length - 1)] << post_fix
  else
    filename
  end
  filename
end

.save_stream(file, stream) ⇒ Object



644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
# File 'lib/rbbt/util/misc/pipes.rb', line 644

def self.save_stream(file, stream)
  save, out = Misc.tee_stream stream
  out.filename = file
  save.filename = file

  Thread.new(Thread.current) do |parent|
    begin
      Misc.sensiblewrite(file, save)
    rescue Exception
      save.abort if save.respond_to? :abort
      stream.abort if stream.respond_to? :abort
      stream.join
      Log.medium "Exception in save_stream: #{$!.message}"
      raise $!
    end
  end

  out
end

.scan_version_text(text, cmd = nil) ⇒ Object



443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
# File 'lib/rbbt/util/misc/inspect.rb', line 443

def self.scan_version_text(text, cmd = nil)
  cmd = "NOCMDGIVE" if cmd.nil? || cmd.empty?
  text.split("\n").each do |line|
    next unless line =~ /\W#{cmd}\W/i
    m = line.match(/(v(?:\d+\.)*\d+(?:-[a-z_]+)?)/i)
    return m[1] if m
    m = line.match(/((?:\d+\.)*\d+(?:-[a-z_]+)?v)/i)
    return m[1] if m
    next unless line =~ /\Wversion\W/i
    m = line.match(/((?:\d+\.)*\d+(?:-[a-z_]+)?)/i)
    return m[1] if m
  end
  m = text.match(/(?:version.*?|#{cmd}.*?|#{cmd.to_s.split(/[-_.]/).first}.*?|v)((?:\d+\.)*\d+(?:-[a-z_]+)?)/i)
  return m[1] if m
  m = text.match(/(?:#{cmd}.*(v.*|.*v))/i)
  return m[1] if m
  nil
end

.sd(list) ⇒ Object



72
73
74
75
76
# File 'lib/rbbt/util/misc/math.rb', line 72

def self.sd(list)
  return nil if list.length < 3
  variance = self.variance(list)
  Math.sqrt(variance)
end

.select_lines(stream1, stream2, sort) ⇒ Object



713
714
715
# File 'lib/rbbt/util/misc/pipes.rb', line 713

def self.select_lines(stream1, stream2, sort)
  self.compare_lines(stream1, stream2, '-1 -2', sort)
end

.select_ranges(stream1, stream2, sep = "\t") ⇒ Object



473
474
475
476
477
# File 'lib/rbbt/util/misc/omics.rb', line 473

def self.select_ranges(stream1, stream2, sep = "\t")
  Misc.open_pipe do |sin|
    intersect_streams(stream1, stream2,sin, sep)
  end
end

.send_email(from, to, subject, message, options = {}) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/rbbt/util/misc/communication.rb', line 48

def self.send_email(from, to, subject, message, options = {})
  require 'mail'

  IndiferentHash.setup(options)
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login, :files => []

  server, port, user, pass, from_alias, to_alias, auth, files = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth, :files

  files = [] if files.nil?
  files = [files] unless Array === files

  Mail.defaults do
    delivery_method :smtp, address: server, port: port, user_name: user, password: pass
  end

  mail = Mail.deliver do 
    from  "#{from_alias} <#{from}>"
    to "#{to_alias} <#{to}>"
    subject subject

    text_part do 
      body message
    end

    files.each do |file|
      file = file.find if Path === file
      file = file.path if Step === file
    end
  end
end

.send_email_old(from, to, subject, message, options = {}) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/rbbt/util/misc/communication.rb', line 29

def self.send_email_old(from, to, subject, message, options = {})
  IndiferentHash.setup(options)
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login

  server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth

  msg = <<-END_OF_MESSAGE
From: #{from_alias} <#{from}>
To: #{to_alias} <#{to}>
Subject: #{subject}

#{message}
END_OF_MESSAGE

  Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
    smtp.send_message msg, from, to
  end
end

.sensiblewrite(path, content = nil, options = {}, &block) ⇒ Object



372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
# File 'lib/rbbt/util/misc/pipes.rb', line 372

def self.sensiblewrite(path, content = nil, options = {}, &block)
  force = Misc.process_options options, :force

  if Open.exists? path and not force
    Misc.consume_stream content 
    return
  end

  lock_options = Misc.pull_keys options.dup, :lock
  lock_options = lock_options[:lock] if Hash === lock_options[:lock]
  tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
  tmp_path_lock = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_lock_dir})

  tmp_path_lock = nil if FalseClass === options[:lock]

  Misc.lock tmp_path_lock, lock_options do

    if Open.exists? path and not force
      Log.warn "Path exists in sensiblewrite, not forcing update: #{ path }"
      Misc.consume_stream content 
    else
      FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
      FileUtils.rm_f tmp_path if File.exist? tmp_path
      begin

        case
        when block_given?
          File.open(tmp_path, 'wb', &block)
        when String === content
          File.open(tmp_path, 'wb') do |f| f.write content end
        when (IO === content or StringIO === content or File === content)

          Open.write(tmp_path) do |f|
            f.sync = true
            while block = content.read(BLOCK_SIZE)
              f.write block
            end 
          end
        else
          File.open(tmp_path, 'wb') do |f|  end
        end

        begin
          Misc.insist do
            Open.mv tmp_path, path, lock_options
          end
        rescue Exception
          raise $! unless Open.exists? path
        end

        Open.touch path if Open.exists? path
        content.join if content.respond_to?(:join) and not Path === content and not (content.respond_to?(:joined?) && content.joined?)

        Open.notify_write(path) 
      rescue Aborted
        Log.medium "Aborted sensiblewrite -- #{ Log.reset << Log.color(:blue, path) }"
        content.abort if content.respond_to? :abort
        Open.rm path if File.exist? path
      rescue Exception
        exception = (AbortedStream === content and content.exception) ? content.exception : $!
        Log.medium "Exception in sensiblewrite: [#{Process.pid}] #{exception.message} -- #{ Log.color :blue, path }"
        content.abort if content.respond_to? :abort
        Open.rm path if File.exist? path
        raise exception
      rescue
        Log.exception $!
        raise $!
      ensure
        FileUtils.rm_f tmp_path if File.exist? tmp_path
        if Lockfile === lock_options[:lock] and lock_options[:lock].locked?
          lock_options[:lock].unlock
        end
      end
    end
  end
end

.snake_case(string) ⇒ Object



98
99
100
101
102
103
104
105
106
# File 'lib/rbbt/util/misc/format.rb', line 98

def self.snake_case(string)
  return nil if string.nil?
  string = string.to_s if Symbol === string
  string.
    gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
    gsub(/([a-z])([A-Z])/,'\1_\2').
    gsub(/\s/,'_').gsub(/[^\w_]/, '').
    split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
end

.sort_genomic_locations(stream, sep = ":") ⇒ Object



346
347
348
# File 'lib/rbbt/util/misc/omics.rb', line 346

def self.sort_genomic_locations(stream, sep = ":")
  sort_stream(stream, '#', "-k1,1 -k2,2n -t#{sep}")
end

.sort_genomic_locations_by_contig(stream, contigs, sep = ":") ⇒ Object



330
331
332
333
334
335
336
337
338
339
340
# File 'lib/rbbt/util/misc/omics.rb', line 330

def self.sort_genomic_locations_by_contig(stream, contigs, sep = ":")
  ext_stream = TSV.traverse stream, :type => :array, :into => :stream do |line|
    chr = line.partition(sep).first
    num = contigs.index chr
    num.to_s + sep + line
  end

  TSV.traverse sort_stream(ext_stream, '#', "-k1,1n -k3,3n -t#{sep}"), :type => :array, :into => :stream do |line|
    line.partition(sep).last
  end
end

.sort_genomic_locations_strict(stream, sep = ":") ⇒ Object



342
343
344
# File 'lib/rbbt/util/misc/omics.rb', line 342

def self.sort_genomic_locations_strict(stream, sep = ":")
  sort_stream(stream, '#', "-k1,1V -k2,2n -t#{sep}")
end

.sort_mutation_stream(stream, sep = ":") ⇒ Object



731
732
733
# File 'lib/rbbt/util/misc/pipes.rb', line 731

def self.sort_mutation_stream(stream, sep=":")
  CMD.cmd("grep '#{sep}' | sort -u | sed 's/^M:/MT:/' | env LC_ALL=C sort -k1,1 -k2,2n -k3,3n -t'#{sep}'", :in => stream, :pipe => true, :no_fail => true)
end

.sort_mutation_stream_strict(stream, sep = ":") ⇒ Object



727
728
729
# File 'lib/rbbt/util/misc/pipes.rb', line 727

def self.sort_mutation_stream_strict(stream, sep=":")
  CMD.cmd("grep '#{sep}' | sort -u | sed 's/^M:/MT:/' | env LC_ALL=C sort -V -k1,1 -k2,2n -k3,3n -t'#{sep}'", :in => stream, :pipe => true, :no_fail => true)
end

.sort_mutations_strict(mutations) ⇒ Object Also known as: sort_mutations



290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/rbbt/util/misc/omics.rb', line 290

def self.sort_mutations_strict(mutations)
  mutations.collect do |mutation|
    chr, pos, mut = mutation.split ":"
    chr.sub!(/^chr/i,'')
    chr = 22 if chr == "Y"
    chr = 23 if chr == "X"
    chr = 24 if chr == "MT" or chr == "M"
    [chr.to_i, pos.to_i, mut, mutation]
  end.sort do |a,b|
    case a[0] <=> b[0]
    when -1
      -1
    when 1
      1
    when 0
      case a[1] <=> b[1]
      when -1
        -1
      when 1
        1
      when 0
        a[2] <=> b[2]
      end
    end
  end.collect{|p| p.last }
end

.sort_stream(stream, header_hash = "#", cmd_args = "-u") ⇒ Object



459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
# File 'lib/rbbt/util/misc/pipes.rb', line 459

def self.sort_stream(stream, header_hash = "#", cmd_args = "-u")
  Misc.open_pipe do |sin|
    stream = TSV.get_stream stream

    line = stream.gets
    while line =~ /^#{header_hash}/ do
      sin.puts line
      line = stream.gets
    end

    line_stream = Misc.open_pipe do |line_stream_in|
      line_stream_in.puts line
      begin
        Misc.consume_stream(stream, false, line_stream_in)
      rescue
        raise $!
      end
    end

    sorted = CMD.cmd("env LC_ALL=C sort #{cmd_args || ""}", :in => line_stream, :pipe => true)

    begin
      Misc.consume_stream(sorted, false, sin)
    rescue
      Log.exception $!
      begin
        sorted.raise($!) if sorted.respond_to? :raise
        stream.raise($!) if stream.respond_to? :raise
      ensure
        raise $!
      end
    end
  end
end

.sorted_array_hits(a1, a2) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/rbbt/util/misc/manipulation.rb', line 36

def self.sorted_array_hits(a1, a2)
  e1, e2 = a1.shift, a2.shift
  counter = 0
  match = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      match << counter
      e1, e2 = a1.shift, a2.shift
      counter += 1
    when -1
      while not e1.nil? and e1 < e2
        e1 = a1.shift 
        counter += 1
      end
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  match
end

.ssh_run(server, script = nil, &block) ⇒ Object



420
421
422
423
424
# File 'lib/rbbt/util/misc/development.rb', line 420

def self.ssh_run(server, script = nil, &block)
  Log.debug "Run ssh script in #{server}:\n#{script}"

  CMD.cmd("ssh '#{server}' 'shopt -s expand_aliases; bash -l -c \"ruby\"' ", :in => script, :log => true).read
end

.std_num_vector(v, min, max) ⇒ Object



32
33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc/math.rb', line 32

def self.std_num_vector(v, min, max)
  v_min = Misc.min(v)
  v_max = Misc.max(v)
  v_range = v_max - v_min
  range = max.to_f - min.to_f

  v.collect{|e| min + range * (e.to_f - v_min) / v_range } 
end

.step_file?(path) ⇒ Boolean

Returns:

  • (Boolean)


277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/rbbt/util/misc/inspect.rb', line 277

def self.step_file?(path)
  return true if defined?(Step) && Step === path.resource
  return false unless path.include?('.files/')
  parts = path.split("/")
  job = parts.select{|p| p =~ /\.files$/}.first
  if job
    i = parts.index job
    begin
      workflow, task = parts.values_at i - 2, i - 1
      Workflow.require_workflow workflow
      #return Kernel.const_get(workflow).tasks.include? task.to_sym
      return true
    rescue
      Log.exception $!
    end
  end
  false
end

.string2const(string) ⇒ Object



21
22
23
24
25
26
27
28
29
30
# File 'lib/rbbt/util/misc/development.rb', line 21

def self.string2const(string)
  return nil if string.nil?
  mod = Kernel

  string.to_s.split('::').each do |str|
    mod = mod.const_get str
  end

  mod
end

.string2hash(string) ⇒ Object

options end



219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# File 'lib/rbbt/util/misc/options.rb', line 219

def self.string2hash(string)
  options = {}

  string.split('#').each do |str|
    key, sep, value = str.partition "="

    key = key[1..-1].to_sym if key[0] == ":"

    options[key] = true and next if value.empty?
    options[key] = value[1..-1].to_sym and next if value[0] == ":"
    options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
    options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
    options[key] = value.to_i and next if value =~ /^\d+$/
    options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
    options[key] = true and next if value == "true"
    options[key] = false and next if value == "false"
    options[key] = value and next 

    options[key] = begin
                     saved_safe = $SAFE
                     $SAFE = 0
                     eval(value)
                   rescue Exception
                     value
                   ensure
                     $SAFE = saved_safe
                   end
  end

  return options

end

.sum(list) ⇒ Object



41
42
43
# File 'lib/rbbt/util/misc/math.rb', line 41

def self.sum(list)
  list.compact.inject(0.0){|acc,e| acc += e }
end

.swap_quoted_character(stream, charout = "\n", charin = " ", quote = '"') ⇒ Object



735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
# File 'lib/rbbt/util/misc/pipes.rb', line 735

def self.swap_quoted_character(stream, charout="\n", charin=" ", quote='"')
  io = Misc.open_pipe do |sin|
    begin
      quoted = false
      prev = nil
      while c = stream.getc
        if c == quote and not prev == "\\"
          quoted = ! quoted
        end
        c = charin if c == charout and quoted
        sin << c
        prev = c
      end
    rescue
      stream.abort if stream.respond_to? :abort
      raise $!
    ensure
      stream.join if stream.respond_to? :join
    end
  end
end

.tar(path, tarfile = nil) ⇒ Object

Creates a tar file in memory recursively from the given path.

Returns a StringIO whose underlying String is the contents of the tar file.



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/rbbt/util/tar.rb', line 14

def self.tar(path, tarfile = nil)
  tarfile ||= StringIO.new("")

  Gem::Package::TarWriter.new(tarfile) do |tar|
    Dir[File.join(path, "**/*")].each do |file|
      mode = File.stat(file).mode
      relative_file = file.sub /^#{Regexp::escape path}\/?/, ''

      if File.directory?(file)
        tar.mkdir relative_file, mode
      else
        tar.add_file relative_file, mode do |tf|
          File.open(file, "rb") { |f| tf.write f.read }
        end
      end
    end
  end

  tarfile.rewind

  tarfile
end

.tarize(path) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/rbbt/util/tar.rb', line 37

def self.tarize(path, gz = nil)
  gz ||= StringIO.new('wb')

  tar =  Misc.in_dir(path) do self.tar('.') end

  tar.rewind

  string = tar.string

  z = Zlib::GzipWriter.new(gz)
  z.write string
  z.close

  gz.reopen('read')
  gz.rewind


  gz
end

.tee_stream_thread(stream) ⇒ Object



248
249
250
# File 'lib/rbbt/util/misc/pipes.rb', line 248

def self.tee_stream_thread(stream)
  tee_stream_thread_multiple(stream, 2)
end

.tee_stream_thread_multiple(stream, num = 2) ⇒ Object Also known as: tee_stream

stream_out1, stream_out2

end



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/rbbt/util/misc/pipes.rb', line 177

def self.tee_stream_thread_multiple(stream, num = 2)
  in_pipes = []
  out_pipes = []
  num.times do 
    sout, sin = Misc.pipe
    in_pipes << sin
    out_pipes << sout
  end

  filename = stream.filename if stream.respond_to? :filename

  splitter_thread = Thread.new(Thread.current) do |parent|
    begin

      skip = [false] * num
      begin
        while block = stream.readpartial(BLOCK_SIZE)

          in_pipes.each_with_index do |sin,i|
            begin 
              sin.write block
            rescue IOError
              Log.error("Tee stream #{i} #{Misc.fingerprint stream} IOError: #{$!.message} (#{Misc.fingerprint sin})");
              skip[i] = true
            rescue
              Log.error("Tee stream #{i} #{Misc.fingerprint stream} Exception: #{$!.message} (#{Misc.fingerprint sin})");
              raise $!
            end unless skip[i] 
          end
        end
      rescue IOError
      end

      stream.close unless stream.closed?
      #stream.join if stream.respond_to? :join
      in_pipes.first.close unless in_pipes.first.closed?
      #Log.medium "Tee done #{Misc.fingerprint stream}"
    rescue Aborted, Interrupt
      stream.abort if stream.respond_to? :abort
      out_pipes.each do |sout|
        sout.abort if sout.respond_to? :abort
      end
      Log.medium "Tee aborting #{Misc.fingerprint stream}"
      raise $!
    rescue Exception
      stream.abort($!) if stream.respond_to? :abort
      out_pipes.each do |sout|
        sout.abort if sout.respond_to? :abort
      end
      Log.medium "Tee exception #{Misc.fingerprint stream}"
      raise $!
    end
  end

  out_pipes.each do |sout|
    ConcurrentStream.setup sout, :threads => splitter_thread, :filename => filename, :_pair => stream
  end

  main_pipe = out_pipes.first
  main_pipe.autojoin = true

  main_pipe.callback = Proc.new do 
    stream.join if stream.respond_to? :join
    in_pipes[1..-1].each do |sin|
      sin.close unless sin.closed?
    end
  end

  out_pipes
end

.time_tickObject



299
300
301
302
303
304
305
306
307
308
# File 'lib/rbbt/util/misc/development.rb', line 299

def self.time_tick
  if $_last_time_tick.nil?
    $_last_time_tick = Time.now
    puts "Tick started: #{Time.now}"
  else
    ellapsed = Time.now - $_last_time_tick
    puts "Tick ellapsed: #{ellapsed.to_i} s. #{(ellapsed * 1000).to_i - ellapsed.to_i * 1000} ms"
    $_last_time_tick = Time.now
  end
end

.timeout_insist(time, msg = nil, &block) ⇒ Object



426
427
428
429
430
431
432
433
434
435
# File 'lib/rbbt/util/misc/development.rb', line 426

def self.timeout_insist(time, msg = nil, &block)
  Misc.insist do
    begin
      Timeout.timeout(time, TryAgain, msg, &block)
    rescue TryAgain
      Log.low "Timeout detected after #{time} seconds"
      raise $!
    end
  end
end

.timespan(str, default = "s") ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/rbbt/util/misc.rb', line 73

def self.timespan(str, default = "s")

  return - timespan(str[1..-1], default) if str[0] == "-"
  
  if str.include?(":")
    seconds, minutes, hours = str.split(":").reverse
    return seconds.to_i + minutes.to_i * 60 + hours.to_i * 60 * 60
  end

  tokens = {
    "s" => (1),
    "sec" => (1),
    "m" => (60),
    "min" => (60),
    "''" => (1),
    "'" => (60),
    "h" => (60 * 60),
    "d" => (60 * 60 * 24),
    "w" => (60 * 60 * 24 * 7),
    "mo" => (60 * 60 * 24 * 31),
    "y" => (60 * 60 * 24 * 365),
  }

  tokens[nil] = tokens[default]
  tokens[""] = tokens[default]
  time = 0
  str.scan(/(\d+)(\w*)/).each do |amount, measure|
    time += amount.to_i * tokens[measure]
  end
  time
end

.to_utf8(string) ⇒ Object



160
161
162
# File 'lib/rbbt/util/misc/format.rb', line 160

def self.to_utf8(string)
  string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
end

.tokenize(str) ⇒ Object



69
70
71
# File 'lib/rbbt/util/misc.rb', line 69

def self.tokenize(str)
  str.scan(/"[^"]*"|'[^']*'|[^"'\s]+/)
end

.total_length(ranges) ⇒ Object



32
33
34
# File 'lib/rbbt/util/misc/manipulation.rb', line 32

def self.total_length(ranges)
  self.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
end

.translate_dna_mutation_hgvs2rbbt(cds) ⇒ Object



205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/rbbt/util/misc/omics.rb', line 205

def self.translate_dna_mutation_hgvs2rbbt(cds)
  change = begin
             case
             when cds =~ />/
               cds.split(">").last
             when cds =~ /delins/
               del, ins = cds.split("delins")
               start, eend = del.split("_")
               del_size = eend.to_i - start.to_i + 1
               if ins =~ /^[ACTG]+$/i
                 ("-" * del_size) + ins
               else
                 Log.debug "Unknown delins: #{ cds }"
                 "?(" << cds << ")"
               end
             when cds =~ /del/
               deletion = cds.split("del").last.chomp
               case
               when deletion =~ /^\d+$/
                 "-" * deletion.to_i
               when deletion =~ /^[ACTG]+$/i
                 "-" * deletion.length
               else
                 Log.debug "Unknown deletion: #{ cds }"
                 "?(" << cds << ")"
               end
             when cds =~ /ins/
               insertion = cds.split("ins").last
               case
               when insertion =~ /^\d+$/
                 "+" + "N" * insertion.to_i
               when insertion =~ /^[NACTG]+$/i
                 "+" + insertion
               else
                 Log.debug "Unknown insertion: #{cds }"
                 "?(" << cds << ")"
               end
             else
               Log.debug "Unknown change: #{cds}"
               "?(" << cds << ")"
             end
           rescue
             Log.debug "Error processing change: #{cds}"
             "?(" << cds << ")"
           end
  change
end

.translate_prot_mutation_hgvs2rbbt(mutation) ⇒ Object



253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/rbbt/util/misc/omics.rb', line 253

def self.translate_prot_mutation_hgvs2rbbt(mutation)
  one_aa_code = THREE_TO_ONE_AA_CODE.values
  one_aa_code << "X" << "B" << "Z" << "J" << "*" << "?"
  one_aa_code_re = one_aa_code*""
  subs = Regexp.new("^[#{one_aa_code_re}]\\d+[#{one_aa_code_re}]")
  f_aa = Regexp.new("^[#{one_aa_code_re}]\\d+")
  mutation.sub!('p.', '')
  mutation = case
             when mutation =~ subs
               mutation
             when mutation =~ /fs/
               mutation =~ f_aa
               if Regexp.last_match(0).nil?
                 Log.debug "Unknown Frameshift: #{mutation}"
                 nil
               else
                 Regexp.last_match(0) + "Frameshift"
               end
             when mutation =~ /ins|del|>/
               mutation =~ f_aa
               if Regexp.last_match(0).nil?
                 Log.debug "Unknown Indel"
                 nil
               else
                 Regexp.last_match(0) + "Indel"
               end
             else
               Log.debug "Unknown change: #{mutation}"
               nil
             end
end

.try3times(&block) ⇒ Object



192
193
194
# File 'lib/rbbt/util/misc/development.rb', line 192

def self.try3times(&block)
  insist(3, &block)
end

.txt_digest_str(txt) ⇒ Object



263
264
265
# File 'lib/rbbt/util/misc/inspect.rb', line 263

def self.txt_digest_str(txt)
  "digest: " << digest(txt)
end

.ungzip(tarfile) ⇒ Object

un-gzips the given IO, returning the decompressed version as a StringIO



79
80
81
82
83
84
# File 'lib/rbbt/util/tar.rb', line 79

def self.ungzip(tarfile)
  z = Zlib::GzipReader.new(tarfile)
  unzipped = StringIO.new(z.read)
  z.close
  unzipped
end

.untar(io, destination) ⇒ Object

untars the given IO into the specified directory



94
95
96
97
98
99
100
101
102
103
# File 'lib/rbbt/util/tar.rb', line 94

def self.untar(io, destination)
  io = io.find if Path === io
  if String === io and File.exist?(io)
    Open.open(io) do |f|
      untar(f, destination)
    end
  else
    return _untar_cmd(io, destination)
  end
end

.unzip_in_dir(file, dir) ⇒ Object



404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/rbbt/util/misc/development.rb', line 404

def self.unzip_in_dir(file, dir)
  raise "Target is not a directory: #{file}" if File.exist?(dir) and not File.directory?(dir)
  if Open.remote? file
    file = file.find if Path === file
    Open.open(file) do |stream|
      TmpFile.with_file(stream.read, true, :extension => 'zip') do |zip_file|
        CMD.cmd("unzip '#{zip_file}' -d '#{dir}'")
      end
    end
  else
    file = file.find if Path === file
    zip_file = file
    CMD.cmd("unzip '#{zip_file}' -d '#{dir}'")
  end
end

.use_lock_id=(use = true) ⇒ Object



2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/rbbt/util/misc/lock.rb', line 2

def self.use_lock_id=(use = true)
  if use
    Log.medium "Activating lockfile ids"
    Lockfile.dont_use_lock_id = false
    Lockfile.refresh = 2 
    Lockfile.max_age = 30
    Lockfile.suspend = 4
  else
    Log.medium "De-activating lockfile ids"
    Lockfile.dont_use_lock_id = true
    Lockfile.refresh = 4
    Lockfile.max_age = 60
    Lockfile.suspend = 8
  end
end

.variance(list) ⇒ Object



55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/rbbt/util/misc/math.rb', line 55

def self.variance(list)
  return nil if list.length < 3
  mean = mean(list)
  list = list.compact
  list_length = list.length

  total_square_distance = 0.0
  list.each do |value|
    distance = value.to_f - mean
    total_square_distance += distance * distance
  end

  variance = total_square_distance / (list_length - 1)

end

.with_env(var, value, &block) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc/system.rb', line 29

def self.with_env(var, value, &block)
  var = var.to_s
  value = value.to_s
  current = ENV[var]
  begin
    ENV[var] = value
    yield
  ensure
    ENV[var] = current
  end
end

.with_fifo(path = nil, clean = true, &block) ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
# File 'lib/rbbt/util/misc/pipes.rb', line 38

def self.with_fifo(path = nil, clean = true, &block)
  begin
    erase = path.nil?
    path = TmpFile.tmp_file if path.nil?
    File.rm path if clean && File.exist?(path)
    File.mkfifo path
    yield path
  ensure
    FileUtils.rm path if erase && File.exist?(path)
  end
end

.zip2hash(list1, list2) ⇒ Object



33
34
35
36
37
38
39
# File 'lib/rbbt/util/misc/options.rb', line 33

def self.zip2hash(list1, list2)
  hash = {}
  list1.each_with_index do |e,i|
    hash[e] = list2[i]
  end
  hash
end

.zip_fields(array) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/rbbt/util/misc/objects.rb', line 78

def self.zip_fields(array)
  if array.length < 10000
    _zip_fields(array)
  else
    zipped_slices = []
    max = array.collect{|l| l.length}.max
    array.each_slice(10000) do |slice|
      zipped_slices << _zip_fields(slice, max)
    end
    new = zipped_slices.first
    zipped_slices[1..-1].each do |rest|
      rest.each_with_index do |list,i|
        new[i].concat list
      end
    end
    new
  end
end

.zscore(e, list) ⇒ Object



133
134
135
136
137
# File 'lib/rbbt/util/misc/math.rb', line 133

def self.zscore(e, list)
  m = Misc.mean(list)
  sd = Misc.sd(list)
  (e.to_f - m) / sd
end