Module: Misc

Defined in:
lib/rbbt/util/misc.rb

Defined Under Namespace

Classes: FieldNotFoundError

Constant Summary collapse

COLOR_LIST =
%w(#BC80BD #CCEBC5 #FFED6F #8DD3C7 #FFFFB3 #BEBADA #FB8072 #80B1D3 #FDB462 #B3DE69 #FCCDE5 #D9D9D9)
Log2Multiplier =
1.0 / Math.log(2.0)
ARRAY_MAX_LENGTH =
1000
STRING_MAX_LENGTH =
ARRAY_MAX_LENGTH * 10
IUPAC2BASE =
{
  "A" => ["A"],
  "C" => ["C"],
  "G" => ["G"],
  "T" => ["T"],
  "U" => ["U"],
  "R" => "A or G".split(" or "),
  "Y" => "C or T".split(" or "),
  "S" => "G or C".split(" or "),
  "W" => "A or T".split(" or "),
  "K" => "G or T".split(" or "),
  "M" => "A or C".split(" or "),
  "B" => "C or G or T".split(" or "),
  "D" => "A or G or T".split(" or "),
  "H" => "A or C or T".split(" or "),
  "V" => "A or C or G".split(" or "),
  "N" => %w(A C T G),
}
BASE2COMPLEMENT =
{
  "A" => "T",
  "C" => "G",
  "G" => "C",
  "T" => "A",
  "U" => "A",
}
THREE_TO_ONE_AA_CODE =
{
  "ala" =>   "A",
  "arg" =>   "R",
  "asn" =>   "N",
  "asp" =>   "D",
  "cys" =>   "C",
  "glu" =>   "E",
  "gln" =>   "Q",
  "gly" =>   "G",
  "his" =>   "H",
  "ile" =>   "I",
  "leu" =>   "L",
  "lys" =>   "K",
  "met" =>   "M",
  "phe" =>   "F",
  "pro" =>   "P",
  "ser" =>   "S",
  "thr" =>   "T",
  "trp" =>   "W",
  "tyr" =>   "Y",
  "val" =>   "V"
}
LOCK_REPO_SERIALIZER =
Marshal
HASH2MD5_MAX_STRING_LENGTH =
1000
HASH2MD5_MAX_ARRAY_LENGTH =
100

Class Method Summary collapse

Class Method Details

.add_defaults(options, defaults = {}) ⇒ Object



1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
# File 'lib/rbbt/util/misc.rb', line 1010

def self.add_defaults(options, defaults = {})
  case
  when Hash === options
    new_options = options.dup
  when String === options
    new_options = string2hash options
  else
    raise "Format of '#{options.inspect}' not understood. It should be a hash"
  end

  defaults.each do |key, value|
    next if options.include? key

    new_options[key] = value 
  end

  new_options
end

.array2hash(array, default = nil) ⇒ Object



637
638
639
640
641
642
643
644
# File 'lib/rbbt/util/misc.rb', line 637

def self.array2hash(array, default = nil)
  hash = {}
  array.each do |key, value|
    value = default.dup if value.nil? and not default.nil?
    hash[key] = value
  end
  hash
end

.benchmark(repeats = 1) ⇒ Object



669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
# File 'lib/rbbt/util/misc.rb', line 669

def self.benchmark(repeats = 1)
  require 'benchmark'
  res = nil
  begin
    measure = Benchmark.measure do
      repeats.times do
        res = yield
      end
    end
    puts "Benchmark for #{ repeats } repeats"
    puts measure
  rescue Exception
    puts "Benchmark aborted"
    raise $!
  end
  res
end

.binary_include?(array, elem) ⇒ Boolean

Returns:

  • (Boolean)


610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
# File 'lib/rbbt/util/misc.rb', line 610

def self.binary_include?(array, elem)
  upper = array.size - 1
  lower = 0

  return -1 if upper < lower

  while(upper >= lower) do
    idx = lower + (upper - lower) / 2
    value = array[idx]

    case elem <=> value
    when 0
      return true
    when -1
      upper = idx - 1
    when 1
      lower = idx + 1
    else
      raise "Cannot compare #{[elem.inspect, value.inspect] * " with "}"
    end
  end

  return false
end

.collapse_ranges(ranges) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/rbbt/util/misc.rb', line 87

def self.collapse_ranges(ranges)
  processed = []
  last = nil
  ranges.sort_by{|range| range.begin }.each do |range|
    if last.nil? or range.begin > last
      processed << range
      last = range.end
    else
      new_processed = []
      processed.each do |processed_range|
        if processed_range.end < range.begin
          new_processed << processed_range
        else
          eend = [range.end, processed_range.end].max
          new_processed << (processed_range.begin..eend)
          break
        end
      end
      processed = new_processed
      last = range.end if range.end > last
    end
  end

  processed
end

.colors_for(list) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/rbbt/util/misc.rb', line 70

def self.colors_for(list)
  unused = COLOR_LIST.dup

  used = {}
  colors = list.collect do |elem|
    if used.include? elem
      used[elem]
    else
      color = unused.shift
      used[elem]=color
      color
    end
  end

  [colors, used]
end

.common_path(dir, file) ⇒ Object



932
933
934
935
936
937
938
939
940
941
942
943
# File 'lib/rbbt/util/misc.rb', line 932

def self.common_path(dir, file)
  file = File.expand_path file
  dir = File.expand_path dir

  return true if file == dir
  while File.dirname(file) != file
    file = File.dirname(file)
    return true if file == dir
  end

  return false
end

.consolidate(list) ⇒ Object



327
328
329
330
331
332
333
334
335
336
# File 'lib/rbbt/util/misc.rb', line 327

def self.consolidate(list)
  list.inject(nil){|acc,e|
    if acc.nil?
      acc = e
    else
      acc.concat e
      acc
    end
  }
end

.correct_icgc_mutation(pos, ref, mut_str) ⇒ Object



25
26
27
28
29
30
# File 'lib/rbbt/util/misc.rb', line 25

def self.correct_icgc_mutation(pos, ref, mut_str)
  mut = mut_str
  mut = '-' * (mut_str.length - 1) if mut =~/^-[ACGT]/
    mut = "+" << mut if ref == '-'
  [pos, [mut]]
end

.correct_vcf_mutation(pos, ref, mut_str) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/rbbt/util/misc.rb', line 32

def self.correct_vcf_mutation(pos, ref, mut_str)
  muts = mut_str.nil? ? [] : mut_str.split(',')

  while ref.length >= 1 and muts.reject{|m| m[0] == ref[0]}.empty?
    ref = ref[1..-1]
    pos = pos + 1
    muts = muts.collect{|m| m[1..-1]}
  end

  muts = muts.collect do |m|
    case
    when ref.empty?
      "+" << m
    when (m.length < ref.length and (m.empty? or ref.index(m)))
      "-" * (ref.length - m.length)
    when (ref.length == 1 and m.length == 1)
      m
    else
      Log.debug{"Cannot understand: #{[ref, m]} (#{ muts })"}
      '-' * ref.length + m
    end
  end

  [pos, muts]
end

.counts(array) ⇒ Object



370
371
372
373
374
375
376
377
378
# File 'lib/rbbt/util/misc.rb', line 370

def self.counts(array)
  counts = {}
  array.each do |e|
    counts[e] ||= 0
    counts[e] += 1
  end

  counts
end

.digest(text) ⇒ Object



1029
1030
1031
# File 'lib/rbbt/util/misc.rb', line 1029

def self.digest(text)
  Digest::MD5.hexdigest(text)
end

.divide(array, num) ⇒ Object

Divides the array into num chunks of the same size by placing one element in each chunk iteratively.



1259
1260
1261
1262
1263
1264
1265
1266
1267
# File 'lib/rbbt/util/misc.rb', line 1259

def self.divide(array, num)
  chunks = []
  num.to_i.times do chunks << [] end
  array.each_with_index{|e, i|
    c = i % num
    chunks[c] << e
  }
  chunks
end

.do_once(&block) ⇒ Object



759
760
761
762
763
764
# File 'lib/rbbt/util/misc.rb', line 759

def self.do_once(&block)
  return nil if $__did_once
  $__did_once = true
  yield
  nil
end

.ensembl_server(organism) ⇒ Object



268
269
270
271
272
273
274
275
# File 'lib/rbbt/util/misc.rb', line 268

def self.ensembl_server(organism)
  date = organism.split("/")[1]
  if date.nil?
    "www.ensembl.org"
  else
    "#{ date }.archive.ensembl.org"
  end
end

.env_add(var, value, sep = ":", prepend = true) ⇒ Object



659
660
661
662
663
664
665
666
667
# File 'lib/rbbt/util/misc.rb', line 659

def self.env_add(var, value, sep = ":", prepend = true)
  ENV[var] ||= ""
  return if ENV[var] =~ /(#{sep}|^)#{Regexp.quote value}(#{sep}|$)/
    if prepend
      ENV[var] = value + sep + ENV[var]
    else
      ENV[var] += sep + ENV[var]
    end
end

.fast_align(reference, sequence) ⇒ Object



448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
# File 'lib/rbbt/util/misc.rb', line 448

def self.fast_align(reference, sequence)
  require 'narray'
  init_gap = -1
  gap = -2
  diff = -2
  same = 2

  cols = sequence.length + 1
  rows = reference.length + 1

  a = NArray.int(cols, rows)


  for spos in 0..cols-1 do a[spos, 0] = spos * init_gap end
  for rpos in 0..rows-1 do a[0, rpos] = rpos * init_gap end

  spos = 1
  while spos < cols do
    rpos = 1
    while rpos < rows do
      match = a[spos-1,rpos-1] + (sequence[spos-1] != reference[rpos-1] ? diff : same)
      skip_sequence = a[spos-1,rpos] + gap
      skip_reference = a[spos,rpos-1] + gap
      a[spos,rpos] = [match, skip_sequence, skip_reference].max
      rpos += 1
    end
    spos += 1
  end

  start = Misc.max(a[-1,0..rows-1])
  start_pos = a[-1,0..rows-1].to_a.index start

  ref = ''
  seq = ''
  rpos = start_pos
  spos = cols - 1

  while spos > 0 and rpos > 0
    score = a[spos,rpos]
    score_match = a[spos-1,rpos-1]
    score_skip_reference = a[spos,rpos-1]
    score_skip_sequence = a[spos-1,rpos]

    case
    when score == score_match + (sequence[spos-1] != reference[rpos-1] ? diff : same)
      ref << reference[rpos-1]
      seq << sequence[spos-1]
      spos -= 1
      rpos -= 1
    when score == score_skip_reference + gap
      ref << reference[rpos-1]
      seq << '-'
      rpos -= 1
    when score == score_skip_sequence + gap
      seq << sequence[spos-1]
      ref << '-'
      spos -= 1
    else
      raise "stop"
    end
  end

  while (rpos > 0)
    ref << reference[rpos-1]
    seq = seq << '-'
    rpos -= 1    
  end

  while (spos > 0)
    seq << sequence[spos-1]
    ref = ref + '-'
    spos -= 1
  end
  
  [ref.reverse + reference[start_pos..-1], seq.reverse + '-' * (rows - start_pos - 1)]
end

.field_position(fields, field, quiet = false) ⇒ Object

Raises:



1248
1249
1250
1251
1252
1253
1254
1255
# File 'lib/rbbt/util/misc.rb', line 1248

def self.field_position(fields, field, quiet = false)
  return field if Integer === field or Range === field
  raise FieldNotFoundError, "Field information missing" if fields.nil? && ! quiet
  fields.each_with_index{|f,i| return i if f == field}
  field_re = Regexp.new /^#{field}$/i
  fields.each_with_index{|f,i| return i if f =~ field_re}
  raise FieldNotFoundError, "Field #{ field.inspect } was not found" unless quiet
end

.filename?(string) ⇒ Boolean

Returns:

  • (Boolean)


277
278
279
# File 'lib/rbbt/util/misc.rb', line 277

def self.filename?(string)
  String === string and string.length > 0 and string.length < 250 and File.exists?(string)
end

.fingerprint(obj) ⇒ Object



207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# File 'lib/rbbt/util/misc.rb', line 207

def self.fingerprint(obj)
  case obj
  when nil
    "nil"
  when Symbol
    ":" << obj.to_s
  when String
    if obj.length > 100
      "'" << obj[0..20-1] << "<...#{obj.length}...>" << obj[-10..-1] << " " << "'"
    else 
      "'" << obj << "'"
    end
  when Array
    if (length = obj.length) > 10
      "[#{length}-" <<  (obj.values_at(0,1, length / 2, -2, -1).collect{|e| fingerprint(e)} * ",") << "]"
    else
      "[" << (obj.collect{|e| fingerprint(e) } * ",") << "]"
    end
  when TSV
    obj.with_unnamed do
      "TSV:{"<< fingerprint(obj.all_fields|| []).inspect << ";" << fingerprint(obj.keys).inspect << "}"
    end
  when Hash
    if obj.length > 10
      "H:{"<< fingerprint(obj.keys) << ";" << fingerprint(obj.values) << "}"
    else
      new = "{"
      obj.each do |k,v|
        new << k.to_s << '=>' << fingerprint(v) << ' '
      end
      new << "}"
    end
  else
    obj.to_s
  end
end

.fixascii(string) ⇒ Object



975
976
977
978
979
980
981
# File 'lib/rbbt/util/misc.rb', line 975

def self.fixascii(string)
  if string.respond_to?(:encode)
    self.fixutf8(string).encode("ASCII-8BIT") 
  else
    string
  end
end

.fixutf8(string) ⇒ Object



963
964
965
966
967
968
969
970
971
972
973
# File 'lib/rbbt/util/misc.rb', line 963

def self.fixutf8(string)
  return string if (string.respond_to? :valid_encoding? and string.valid_encoding?) or
  (string.respond_to? :valid_encoding and string.valid_encoding)
  if string.respond_to?(:encode)
    string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
  else
    require 'iconv'
    @@ic ||= Iconv.new('UTF-8//IGNORE', 'UTF-8')
    @@ic.iconv(string)
  end
end

.GET_params2hash(string) ⇒ Object



795
796
797
798
799
800
801
802
# File 'lib/rbbt/util/misc.rb', line 795

def self.GET_params2hash(string)
  hash = {}
  string.split('&').collect{|item|
    key, value = item.split("=").values_at 0, 1
    hash[key] = value.nil? ? "" : CGI.unescape(value)
  }
  hash
end

.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil) ⇒ Object



290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'lib/rbbt/util/misc.rb', line 290

def self.google_venn(list1, list2, list3, name1 = nil, name2 = nil, name3 = nil, total = nil)
  name1 ||= "list 1"
  name2 ||= "list 2"
  name3 ||= "list 3"

  sizes = [list1, list2, list3, list1 & list2, list1 & list3, list2 & list3, list1 & list2 & list3].collect{|l| l.length}

  total = total.length if Array === total

  label = "#{name1}: #{sizes[0]} (#{name2}: #{sizes[3]}, #{name3}: #{sizes[4]})"
  label << "|#{name2}: #{sizes[1]} (#{name1}: #{sizes[3]}, #{name3}: #{sizes[5]})"
    label << "|#{name3}: #{sizes[2]} (#{name1}: #{sizes[4]}, #{name2}: #{sizes[5]})"
    if total
      label << "| INTERSECTION: #{sizes[6]} TOTAL: #{total}"
    else
      label << "| INTERSECTION: #{sizes[6]}"
    end

  max = total || sizes.max
  sizes = sizes.collect{|v| (v.to_f/max * 100).to_i.to_f / 100}
  url = "https://chart.googleapis.com/chart?cht=v&chs=500x300&chd=t:#{sizes * ","}&chco=FF6342,ADDE63,63C6DE,FFFFFF&chdl=#{label}"
end

.hash2GET_params(hash) ⇒ Object



804
805
806
807
808
809
810
811
812
813
814
815
816
817
# File 'lib/rbbt/util/misc.rb', line 804

def self.hash2GET_params(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object Array).include? v.class.to_s
    v = case 
        when Symbol === v
          v.to_s
        when Array === v
          v * ","
        else
          CGI.escape(v.to_s)
        end
    [ Symbol === k ? k.to_s : k,  v] * "="
  }.compact * "&"
end

.hash2md5(hash) ⇒ Object



1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
# File 'lib/rbbt/util/misc.rb', line 1035

def self.hash2md5(hash)
  str = ""
  keys = hash.keys
  keys = keys.clean_annotations if keys.respond_to? :clean_annotations
  keys = keys.sort_by{|k| k.to_s}

  if hash.respond_to? :unnamed
    unnamed = hash.unnamed
    hash.unnamed = true 
  end
  keys.each do |k|
    next if k == :monitor or k == "monitor" or k == :in_situ_persistence or k == "in_situ_persistence"
    v = hash[k]
    case
    when TrueClass === v
      str << k.to_s << "=>true" 
    when FalseClass === v
      str << k.to_s << "=>false" 
    when Hash === v
      str << k.to_s << "=>" << hash2md5(v)
    when Symbol === v
      str << k.to_s << "=>" << v.to_s
    when (String === v and v.length > HASH2MD5_MAX_STRING_LENGTH)
      str << k.to_s << "=>" << v[0..HASH2MD5_MAX_STRING_LENGTH] << "; #{ v.length }"
    when String === v
      str << k.to_s << "=>" << v
    when (Array === v and v.length > HASH2MD5_MAX_ARRAY_LENGTH)
      str << k.to_s << "=>[" << v[0..HASH2MD5_MAX_ARRAY_LENGTH] * "," << "; #{ v.length }]"
    when Array === v
      str << k.to_s << "=>[" << v * "," << "]"
    else
      v_ins = v.inspect

      case
      when v_ins =~ /:0x0/
        str << k.to_s << "=>" << v_ins.sub(/:0x[a-f0-9]+@/,'')
      else
        str << k.to_s << "=>" << v_ins
      end

    end

    str << "_" << hash2md5(v.info) if defined? Annotated and Annotated === v
  end
  hash.unnamed = unnamed if hash.respond_to? :unnamed

  if str.empty?
    ""
  else
    digest(str)
  end
end

.hash2string(hash) ⇒ Object



787
788
789
790
791
792
793
# File 'lib/rbbt/util/misc.rb', line 787

def self.hash2string(hash)
  hash.sort_by{|k,v| k.to_s}.collect{|k,v| 
    next unless %w(Symbol String Float Fixnum Integer TrueClass FalseClass Module Class Object).include? v.class.to_s
    [ Symbol === k ? ":" << k.to_s : k,
      Symbol === v ? ":" << v.to_s : v] * "="
  }.compact * "#"
end

.hash_to_html_tag_attributes(hash) ⇒ Object



819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
# File 'lib/rbbt/util/misc.rb', line 819

def self.hash_to_html_tag_attributes(hash)
  return "" if hash.nil? or hash.empty?
  hash.collect{|k,v| 
    case 
    when (k.nil? or v.nil? or (String === v and v.empty?))
      nil
    when Array === v
      [k,"'" << v * " " << "'"] * "="
    when String === v
      [k,"'" << v << "'"] * "="
    when Symbol === v
      [k,"'" << v.to_s << "'"] * "="
    when TrueClass === v
      [k,"'" << v.to_s << "'"] * "="
    when (Fixnum === v or Float === v)
      [k,"'" << v.to_s << "'"] * "="
    else
      nil
    end
  }.compact * " "
end

.hostnameObject



867
868
869
# File 'lib/rbbt/util/misc.rb', line 867

def self.hostname
  @hostanem ||= `hostname`.strip
end

.html_tag(tag, content = nil, params = {}) ⇒ Object



841
842
843
844
845
846
847
848
849
850
851
# File 'lib/rbbt/util/misc.rb', line 841

def self.html_tag(tag, content = nil, params = {})
  attr_str = hash_to_html_tag_attributes(params)
  attr_str = " " << attr_str if String === attr_str and attr_str != ""
  html = if content.nil?
    "<#{ tag }#{attr_str}/>"
  else
    "<#{ tag }#{attr_str}>#{ content }</#{ tag }>"
  end

  html
end

.humanize(value, options = {}) ⇒ Object

source: gist.github.com/ekdevdes/2450285 author: Ethan Kramer (github.com/ekdevdes)



1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
# File 'lib/rbbt/util/misc.rb', line 1300

def self.humanize(value, options = {})
  if options.empty?
    options[:format] = :sentence
  end

  values = []
  values = value.split('_')
  values.each_index do |index|
    # lower case each item in array
    # Miguel Vazquez edit: Except for acronyms
    values[index].downcase! unless values[index].match(/[a-zA-Z][A-Z]/)
  end
  if options[:format] == :allcaps
    values.each do |value|
      value.capitalize!
    end

    if options.empty?
      options[:seperator] = " "
    end

    return values.join " "
  end

  if options[:format] == :class
    values.each do |value|
      value.capitalize!
    end

    return values.join ""
  end

  if options[:format] == :sentence
    values[0].capitalize! unless values[0].match(/[a-zA-Z][A-Z]/)

    return values.join " "
  end

  if options[:format] == :nocaps
    return values.join " "
  end
end

.in_dir(dir) ⇒ Object

WARN: probably not thread safeā€¦



946
947
948
949
950
951
952
953
954
955
956
957
# File 'lib/rbbt/util/misc.rb', line 946

def self.in_dir(dir)
  old_pwd = FileUtils.pwd
  res = nil
  begin
    FileUtils.mkdir_p dir unless File.exists? dir
    FileUtils.cd dir
    res = yield
  ensure
    FileUtils.cd old_pwd
  end
  res
end

.insist(times = 3, sleep = nil) ⇒ Object



770
771
772
773
774
775
776
777
778
779
780
781
# File 'lib/rbbt/util/misc.rb', line 770

def self.insist(times = 3, sleep = nil)
  try = 0
  begin
    yield
  rescue
    Log.warn("Insisting after exception: #{$!.message}")
    sleep sleep if sleep
    try += 1
    retry if try < times
    raise $!
  end
end

.intersect_sorted_arrays(a1, a2) ⇒ Object



559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
# File 'lib/rbbt/util/misc.rb', line 559

def self.intersect_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  intersect = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      intersect << e1
      e1, e2 = a1.shift, a2.shift
    when -1
      e1 = a1.shift while not e1.nil? and e1 < e2
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  intersect
end

.is_filename?(string) ⇒ Boolean

Returns:

  • (Boolean)


529
530
531
532
533
# File 'lib/rbbt/util/misc.rb', line 529

def self.is_filename?(string)
  return true if string.respond_to? :exists
  return true if String === string and string.length < 265 and File.exists? string
  return false
end

.IUPAC_to_base(iupac) ⇒ Object



525
526
527
# File 'lib/rbbt/util/misc.rb', line 525

def self.IUPAC_to_base(iupac)
  IUPAC2BASE[iupac]
end

.lock(file, *args) ⇒ Object



871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
# File 'lib/rbbt/util/misc.rb', line 871

def self.lock(file, *args)
  return yield file, *args if file.nil?
  FileUtils.mkdir_p File.dirname(File.expand_path(file)) unless File.exists?  File.dirname(File.expand_path(file))

  res = nil

  lockfile = Lockfile.new(File.expand_path(file + '.lock'))

  begin
    if File.exists? lockfile and
      Misc.hostname == (info = YAML.load_file(lockfile))["host"] and 
      info["pid"] and not Misc.pid_exists?(info["pid"])

      Log.info("Removing lockfile: #{lockfile}. This pid #{Process.pid}. Content: #{info.inspect}")
      FileUtils.rm lockfile 
    end
  rescue
    Log.warn("Error checking lockfile #{lockfile}: #{$!.message}. Removing. Content: #{begin Open.read(lockfile) rescue "Could not open file" end}")
    FileUtils.rm lockfile if File.exists? lockfile 
  end

  lockfile.lock do 
    res = yield file, *args
  end

  res
end

.lock_in_repo(repo, key, *args) ⇒ Object



901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
# File 'lib/rbbt/util/misc.rb', line 901

def self.lock_in_repo(repo, key, *args)
  return yield file, *args if repo.nil? or key.nil?

  lock_key = "lock-" << key

  begin
    if repo[lock_key] and
      Misc.hostname == (info = LOCK_REPO_SERIALIZER.load(repo[lock_key]))["host"] and 
      info["pid"] and not Misc.pid_exists?(info["pid"])

      Log.info("Removing lockfile: #{lock_key}. This pid #{Process.pid}. Content: #{info.inspect}")
      repo.out lock_key 
    end
  rescue
    Log.warn("Error checking lockfile #{lock_key}: #{$!.message}. Removing. Content: #{begin repo[lock_key] rescue "Could not open file" end}")
    repo.out lock_key if repo.include? lock_key
  end

  while repo[lock_key]
    sleep 1
  end
  
  repo[lock_key] = LOCK_REPO_SERIALIZER.dump({:hostname => Misc.hostname, :pid => Process.pid})

  res = yield lock_key, *args

  repo.delete lock_key

  res
end

.log2(x) ⇒ Object



160
161
162
# File 'lib/rbbt/util/misc.rb', line 160

def self.log2(x)
  Math.log(x) * Log2Multiplier
end

.max(list) ⇒ Object



281
282
283
284
285
286
287
288
# File 'lib/rbbt/util/misc.rb', line 281

def self.max(list)
  max = nil
  list.each do |v|
    next if v.nil?
    max = v if max.nil? or v > max
  end
  max
end

.mean(list) ⇒ Object



317
318
319
# File 'lib/rbbt/util/misc.rb', line 317

def self.mean(list)
  sum(list) / list.compact.length
end

.memprofObject



743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
# File 'lib/rbbt/util/misc.rb', line 743

def self.memprof
  require 'memprof'
  Memprof.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    Memprof.stop
    print Memprof.stats
  end

  res
end

.merge_sorted_arrays(a1, a2) ⇒ Object



578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
# File 'lib/rbbt/util/misc.rb', line 578

def self.merge_sorted_arrays(a1, a2)
  e1, e2 = a1.shift, a2.shift
  new = []
  while true
    case
    when (e1 and e2)
      case e1 <=> e2
      when 0
        new << e1 
        e1, e2 = a1.shift, a2.shift
      when -1
        new << e1
        e1 = a1.shift
      when 1
        new << e2
        e2 = a2.shift
      end
    when e2
      new << e2
      new.concat a2
      break
    when e1
      new << e1
      new.concat a1
      break
    else
      break
    end
  end
  new
end

.ordered_divide(array, num) ⇒ Object

Divides the array into chunks of num same size by placing one element in each chunk iteratively.



1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
# File 'lib/rbbt/util/misc.rb', line 1271

def self.ordered_divide(array, num)
  last = array.length - 1
  chunks = []
  current = 0
  while current <= last
    next_current = [last, current + num - 1].min
    chunks << array[current..next_current]
    current = next_current + 1
  end
  chunks
end

.path_relative_to(basedir, path) ⇒ Object



853
854
855
856
857
858
859
860
861
862
863
864
865
# File 'lib/rbbt/util/misc.rb', line 853

def self.path_relative_to(basedir, path)
  path = File.expand_path(path)
  basedir = File.expand_path(basedir)

  case
  when path == basedir
    "."
  when path =~ /#{Regexp.quote basedir}\/(.*)/
    return $1
  else
    return nil
  end
end

.pid_exists?(pid) ⇒ Boolean

Returns:

  • (Boolean)


58
59
60
61
62
63
64
65
66
# File 'lib/rbbt/util/misc.rb', line 58

def self.pid_exists?(pid)
  return false if pid.nil?
  begin
    Process.getpgid(pid.to_i)
    true
  rescue Errno::ESRCH
    false
  end
end

.positional2hash(keys, *values) ⇒ Object



338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/rbbt/util/misc.rb', line 338

def self.positional2hash(keys, *values)
  if Hash === values.last
    extra = values.pop
    inputs = Misc.zip2hash(keys, values)
    inputs.delete_if{|k,v| v.nil? or (String === v and v.empty?)}
    inputs = Misc.add_defaults inputs, extra
    inputs.delete_if{|k,v| not keys.include?(k) and not (Symbol === k ? keys.include?(k.to_s) : keys.include?(k.to_sym))}
    inputs
  else
    Misc.zip2hash(keys, values)
  end
end

.prepare_entity(entity, field, options = {}) ⇒ Object



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/rbbt/util/misc.rb', line 164

def self.prepare_entity(entity, field, options = {})
  return entity unless defined? Entity
  return entity unless String === entity or Array === entity
  options ||= {}

  dup_array = options.delete :dup_array

  if Annotated === field or Entity.respond_to?(:formats) and Entity.formats.include? field
    params = options.dup

    params[:format] ||= params.delete "format"
    params.merge!(:format => field) unless params.include?(:format) and not ((f = params[:format]).nil? or (String === f and f.empty?))

    mod = Entity === field ? field : Entity.formats[field]
    entity = mod.setup(
      ((entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity) ),
      params
    ) 
  end

  entity
end

.process_options(hash, *keys) ⇒ Object



1088
1089
1090
1091
1092
1093
1094
# File 'lib/rbbt/util/misc.rb', line 1088

def self.process_options(hash, *keys)
  if keys.length == 1
    hash.include?(keys.first.to_sym) ? hash.delete(keys.first.to_sym) : hash.delete(keys.first.to_s) 
  else
    keys.collect do |key| hash.include?(key.to_sym) ? hash.delete(key.to_sym) : hash.delete(key.to_s) end
  end
end

.process_to_hash(list) ⇒ Object



654
655
656
657
# File 'lib/rbbt/util/misc.rb', line 654

def self.process_to_hash(list)
  result = yield list
  zip2hash(list, result)
end

.profile(options = {}) ⇒ Object



726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
# File 'lib/rbbt/util/misc.rb', line 726

def self.profile(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::FlatPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_graph(options = {}) ⇒ Object



708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
# File 'lib/rbbt/util/misc.rb', line 708

def self.profile_graph(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    #result.eliminate_methods!([/annotated_array_clean_/])
    printer = RubyProf::GraphPrinter.new(result)
    printer.print(STDOUT, options)
  end

  res
end

.profile_html(options = {}) ⇒ Object



687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
# File 'lib/rbbt/util/misc.rb', line 687

def self.profile_html(options = {})
  require 'ruby-prof'
  RubyProf.start
  begin
    res = yield
  rescue Exception
    puts "Profiling aborted"
    raise $!
  ensure
    result = RubyProf.stop
    printer = RubyProf::MultiPrinter.new(result)
    TmpFile.with_file do |dir|
      FileUtils.mkdir_p dir unless File.exists? dir
      printer.print(:path => dir, :profile => 'profile')
      CMD.cmd("firefox  -no-remote  '#{ dir }'")
    end
  end

  res
end

.proportions(array) ⇒ Object



380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
# File 'lib/rbbt/util/misc.rb', line 380

def self.proportions(array)
  total = array.length

  proportions = Hash.new 0

  array.each do |e|
    proportions[e] += 1.0 / total
  end

  class << proportions; self;end.class_eval do
    def to_s
      sort{|a,b| a[1] == b[1] ? a[0] <=> b[0] : a[1] <=> b[1]}.collect{|k,c| "%3d\t%s" % [c, k]} * "\n"
    end
  end

  proportions
end

.pull_keys(hash, prefix) ⇒ Object



1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
# File 'lib/rbbt/util/misc.rb', line 1096

def self.pull_keys(hash, prefix)
  new = {}
  hash.keys.each do |key|
    if key.to_s =~ /#{ prefix }_(.*)/
      case
      when String === key
        new[$1] = hash.delete key
      when Symbol === key
        new[$1.to_sym] = hash.delete key
      end
    else
      if key.to_s == prefix.to_s
        new[key] = hash.delete key
      end
    end
  end

  new
end

.random_sample_in_range(total, size) ⇒ Object



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/rbbt/util/misc.rb', line 118

def self.random_sample_in_range(total, size)
  p = Set.new

  if size > total / 10
    template = (0..total - 1).to_a
    size.times do |i|
      pos = (rand * (total - i)).floor
      if pos == template.length - 1
        v = template.pop
      else
        v, n = template[pos], template[-1]
        template.pop
        template[pos] = n 
      end
      p << v
    end
  else
    size.times do 
      pos = nil
      while pos.nil? 
        pos = (rand * total).floor
        if p.include? pos
          pos = nil
        end
      end
      p << pos
    end
  end
  p
end

.remove_long_items(obj) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# File 'lib/rbbt/util/misc.rb', line 245

def self.remove_long_items(obj)
  case
  when TSV === obj
    remove_long_items((obj.all_fields || []) + obj.keys.sort)
  when (Array === obj and obj.length > ARRAY_MAX_LENGTH)
    remove_long_items(obj[0..ARRAY_MAX_LENGTH-2] << "TRUNCATED at #{ ARRAY_MAX_LENGTH } (#{obj.length})")
  when (Hash === obj and obj.length > ARRAY_MAX_LENGTH)
    remove_long_items(obj.collect.compact[0..ARRAY_MAX_LENGTH-2] << ["TRUNCATED", "at #{ ARRAY_MAX_LENGTH } (#{obj.length})"])
  when (String === obj and obj.length > STRING_MAX_LENGTH)
    obj[0..STRING_MAX_LENGTH-1] << " TRUNCATED at #{STRING_MAX_LENGTH} (#{obj.length})"
  when Hash === obj
    new = {}
    obj.each do |k,v|
      new[k] = remove_long_items(v)
    end
    new
  when Array === obj
    obj.collect do |e| remove_long_items(e) end
  else
    obj
  end
end

.reset_do_onceObject



766
767
768
# File 'lib/rbbt/util/misc.rb', line 766

def self.reset_do_once
  $__did_once = false
end

.sample(ary, size, replacement = false) ⇒ Object



149
150
151
152
153
154
155
156
157
# File 'lib/rbbt/util/misc.rb', line 149

def self.sample(ary, size, replacement = false)
  if ary.respond_to? :sample
    ary.sample size
  else
    total = ary.length
    p = random_sample_in_range(total, size)
    ary.values_at *p
  end
end

.sanitize_filename(filename, length = 254) ⇒ Object



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/rbbt/util/misc.rb', line 190

def self.sanitize_filename(filename, length = 254)
  if filename.length > length
    if filename =~ /(\..{2,9})$/
      extension = $1
    else
      extension = ''
    end

    post_fix = "--#{filename.length}@#{length}_#{Misc.digest(filename)[0..4]}" + extension

    filename = filename[0..(length - post_fix.length - 1)] << post_fix
  else
    filename
  end
  filename
end

.sd(list) ⇒ Object



321
322
323
324
325
# File 'lib/rbbt/util/misc.rb', line 321

def self.sd(list)
  return nil if list.length < 3
  mean = mean(list)
  Math.sqrt(list.compact.inject(0.0){|acc,e| d = e - mean; acc += d * d}) / (list.compact.length - 1)
end

.send_email(from, to, subject, message, options = {}) ⇒ Object



351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/rbbt/util/misc.rb', line 351

def self.send_email(from, to, subject, message, options = {})
  IndiferentHash.setup(options)
  options = Misc.add_defaults options, :from_alias => nil, :to_alias => nil, :server => 'localhost', :port => 25, :user => nil, :pass => nil, :auth => :login

  server, port, user, pass, from_alias, to_alias, auth = Misc.process_options options, :server, :port, :user, :pass, :from_alias, :to_alias, :auth

  msg = <<-END_OF_MESSAGE
From: #{from_alias} <#{from}>
To: #{to_alias} <#{to}>
Subject: #{subject}

#{message}
END_OF_MESSAGE

Net::SMTP.start(server, port, server, user, pass, auth) do |smtp|
smtp.send_message msg, from, to
end
end

.sensiblewrite(path, content) ⇒ Object



983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
# File 'lib/rbbt/util/misc.rb', line 983

def self.sensiblewrite(path, content)
  Misc.lock path + '.sensible_write' do
    if not File.exists? path
      begin
        tmp_path = path + '.tmp'
        case
        when String === content
          File.open(tmp_path, 'w') do |f|  f.write content  end
        when (IO === content or StringIO === content)
          File.open(tmp_path, 'w') do |f|  while l = content.gets; f.write l; end  end
      else
        File.open(tmp_path, 'w') do |f|  end
      end
      FileUtils.mv tmp_path, path
    rescue Interrupt
      FileUtils.rm_f tmp_path if File.exists? tmp_path
      FileUtils.rm_f path if File.exists? path
      raise "Interrupted (Ctrl-c)"
    rescue Exception
      FileUtils.rm_f tmp_path if File.exists? tmp_path
      FileUtils.rm_f path if File.exists? path
      raise $!
    end
  end
end
end

.snake_case(string) ⇒ Object



1288
1289
1290
1291
1292
1293
1294
1295
1296
# File 'lib/rbbt/util/misc.rb', line 1288

def self.snake_case(string)
  return nil if string.nil?
  string = string.to_s if Symbol === string
  string.
    gsub(/([A-Z]{2,})([A-Z][a-z])/,'\1_\2').
    gsub(/([a-z])([A-Z])/,'\1_\2').
    gsub(/\s/,'_').gsub(/[^\w_]/, '').
    split("_").collect{|p| p.match(/[A-Z]{2,}/) ? p : p.downcase } * "_"
end

.sorted_array_hits(a1, a2) ⇒ Object



535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
# File 'lib/rbbt/util/misc.rb', line 535

def self.sorted_array_hits(a1, a2)
  e1, e2 = a1.shift, a2.shift
  counter = 0
  match = []
  while true
    break if e1.nil? or e2.nil?
    case e1 <=> e2
    when 0
      match << counter
      e1, e2 = a1.shift, a2.shift
      counter += 1
    when -1
      while not e1.nil? and e1 < e2
        e1 = a1.shift 
        counter += 1
      end
    when 1
      e2 = a2.shift
      e2 = a2.shift while not e2.nil? and e2 < e1
    end
  end
  match
end

.string2const(string) ⇒ Object



1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
# File 'lib/rbbt/util/misc.rb', line 1116

def self.string2const(string)
  return nil if string.nil?
  mod = Kernel

  string.to_s.split('::').each do |str|
    mod = mod.const_get str
  end

  mod
end

.string2hash(string) ⇒ Object



1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
# File 'lib/rbbt/util/misc.rb', line 1173

def self.string2hash(string)
  options = {}

  string.split('#').each do |str|
    key, sep, value = str.partition "="

    key = key[1..-1].to_sym if key[0] == ":"

    options[key] = true and next if value.empty?
    options[key] = value[1..-1].to_sym and next if value[0] == ":"
    options[key] = Regexp.new(/#{value[1..-2]}/) and next if value[0] == "/" and value[-1] == "/"
    options[key] = value[1..-2] and next if value =~ /^['"].*['"]$/
    options[key] = value.to_i and next if value =~ /^\d+$/
    options[key] = value.to_f and next if value =~ /^\d*\.\d+$/
    options[key] = true and next if value == "true"
    options[key] = false and next if value == "false"
    options[key] = value and next 

    options[key] = begin
                     saved_safe = $SAFE
                     $SAFE = 0
                     eval(value)
                   rescue Exception
                     value
                   ensure
                     $SAFE = saved_safe
                   end
  end

  return options

  options = {}
  string.split(/#/).each do |str|
    if str.match(/(.*)=(.*)/)
      option, value = $1, $2
    else
      option, value = str, true
    end

    option = option.sub(":",'').to_sym if option.chars.first == ':'
    value  = value.sub(":",'').to_sym if String === value and value.chars.first == ':'

    if value == true
      options[option] = option.to_s.chars.first != '!' 
    else
      options[option] = Thread.start do
        $SAFE = 0;
        case 
        when value =~ /^(?:true|T)$/i
          true
        when value =~ /^(?:false|F)$/i
          false
        when Symbol === value
          value
        when (String === value and value =~ /^\/(.*)\/$/)
          Regexp.new /#{$1}/
        else
          begin
            Kernel.const_get value
          rescue
            begin  
              raise if value =~ /[a-z]/ and defined? value
              eval(value) 
            rescue Exception
              value 
            end
          end
        end
      end.value
    end
  end

  options
end

.string2hash_old(string) ⇒ Object



1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
# File 'lib/rbbt/util/misc.rb', line 1127

def self.string2hash_old(string)

  options = {}
  string.split(/#/).each do |str|
    if str.match(/(.*)=(.*)/)
      option, value = $1, $2
    else
      option, value = str, true
    end

    option = option.sub(":",'').to_sym if option.chars.first == ':'
    value  = value.sub(":",'').to_sym if String === value and value.chars.first == ':'

    if value == true
      options[option] = option.to_s.chars.first != '!' 
    else
      options[option] = Thread.start do
        $SAFE = 0;
        case 
        when value =~ /^(?:true|T)$/i
          true
        when value =~ /^(?:false|F)$/i
          false
        when Symbol === value
          value
        when (String === value and value =~ /^\/(.*)\/$/)
          Regexp.new /#{$1}/
        else
          begin
            Kernel.const_get value
          rescue
            begin  
              raise if value =~ /[a-z]/ and defined? value
              eval(value) 
            rescue Exception
              value 
            end
          end
        end
      end.value
    end
  end

  options
end

.sum(list) ⇒ Object



313
314
315
# File 'lib/rbbt/util/misc.rb', line 313

def self.sum(list)
  list.compact.inject(0.0){|acc,e| acc += e}
end

.to_utf8(string) ⇒ Object



959
960
961
# File 'lib/rbbt/util/misc.rb', line 959

def self.to_utf8(string)
  string.encode("UTF-16BE", :invalid => :replace, :undef => :replace, :replace => "?").encode('UTF-8')
end

.total_length(ranges) ⇒ Object



113
114
115
# File 'lib/rbbt/util/misc.rb', line 113

def self.total_length(ranges)
  Misc.collapse_ranges(ranges).inject(0) do |total,range| total += range.end - range.begin + 1 end
end

.try3times(&block) ⇒ Object



783
784
785
# File 'lib/rbbt/util/misc.rb', line 783

def self.try3times(&block)
  insist(3, &block)
end

.zip2hash(list1, list2) ⇒ Object



646
647
648
649
650
651
652
# File 'lib/rbbt/util/misc.rb', line 646

def self.zip2hash(list1, list2)
  hash = {}
  list1.each_with_index do |e,i|
    hash[e] = list2[i]
  end
  hash
end

.zip_fields(array) ⇒ Object



1283
1284
1285
1286
# File 'lib/rbbt/util/misc.rb', line 1283

def self.zip_fields(array)
  return [] if array.empty?
  array[0].zip(*array[1..-1])
end