Class: Phylip

Inherits:
Object
  • Object
show all
Defined in:
lib/rphylip.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(phylipfile) ⇒ Phylip

Returns a new instance of Phylip.



19
20
21
22
23
24
25
26
27
# File 'lib/rphylip.rb', line 19

def initialize(phylipfile)
  raise "File #{phylipfile} does not exist" unless File.exists?(phylipfile)
  @filename = phylipfile
  @seqs = File.open(phylipfile).readlines
  @numtaxa, @seqlen = @seqs[0].split.map{|w| w.to_i}
  @seqs.delete_at(0)
  @seqs.delete_if{|l| l=~ /^\s+$/}
  raise "wrong number of seqs,parsed #{@seqs.size} expected ntaxa #{@numtaxa}" unless @seqs.size == @numtaxa
end

Instance Attribute Details

#numtaxaObject (readonly)

Returns the value of attribute numtaxa.



18
19
20
# File 'lib/rphylip.rb', line 18

def numtaxa
  @numtaxa
end

#seqlenObject (readonly)

Returns the value of attribute seqlen.



18
19
20
# File 'lib/rphylip.rb', line 18

def seqlen
  @seqlen
end

#seqsObject (readonly)

Returns the value of attribute seqs.



18
19
20
# File 'lib/rphylip.rb', line 18

def seqs
  @seqs
end

Instance Method Details

#expand_with(phylipfile) ⇒ Object



116
117
118
119
120
121
122
123
124
125
126
# File 'lib/rphylip.rb', line 116

def expand_with(phylipfile)
  additional_phylip = Phylip.new(phylipfile)
  if additional_phylip.seqlen == self.seqlen then
    additional_phylip.seqs.each do |newseq|
      @seqs << newseq
      @numtaxa += 1
    end
  else
    raise "different sequence lengths for new #{phylipfile}, cannot be expanded"
  end
end

#extract_partition(from_pos, to_pos) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/rphylip.rb', line 127

def extract_partition(from_pos, to_pos)
  @seqlen = to_pos - from_pos  + 1
  newseqs = []
  from = from_pos - 1
  to = to_pos - 1
  @seqs.each do |seq|
    name, info = seq.split
    newseqs << name + " " + info.slice!(from..to) 
  end
  @seqs = newseqs
  self.save_as(@filename + "_from#{from_pos}_to#{to_pos}")
end

#generate_base_alignment(numseqs_base) ⇒ Object



36
37
38
39
40
41
42
# File 'lib/rphylip.rb', line 36

def generate_base_alignment(numseqs_base)
    seqs = @seqs[0...numseqs_base].select{|s| s.split.last.split(//).uniq.size > 1}
    basename = @filename + "_initial"
    puts "Saving base alignment as #{basename} with #{seqs.size} seqs"
    raise "too few seqs" unless seqs.size > 4
    self.save_seqs_as(seqs, basename)
end

#generate_update(seqs, update_filename) ⇒ Object



43
44
45
46
47
48
49
# File 'lib/rphylip.rb', line 43

def generate_update(seqs, update_filename)
    # remove from seqs single char stuff
    seqs = seqs.select{|s| s.split.last.split(//).uniq.size > 1}
    puts "Saving update alignment as #{update_filename} with #{seqs.size} seqs"
    raise "too few seqs" unless seqs.size > 4
    self.save_seqs_as(seqs, update_filename)
end

#namesObject



28
29
30
31
32
33
34
35
# File 'lib/rphylip.rb', line 28

def names
  ali_names = []
  @seqs.each do |seq|
    ali_names << seq.split.first.strip 
  end
  raise "unexpected #names" unless ali_names.size == @numtaxa
  ali_names
end

#remove_taxa(taxa, pruned_phylip) ⇒ Object



98
99
100
101
102
103
104
105
106
# File 'lib/rphylip.rb', line 98

def remove_taxa(taxa, pruned_phylip)
  puts "Original size #{@seqs.size}, after removal expect #{@seqs.size - taxa.size}"
  raise "empty list of taxa to prune" if not taxa or taxa.empty?
  taxa.each do |taxon|
    @seqs.delete_if{|l| l.split.first.strip == taxon}
  end
  self.save_as(pruned_phylip)
  puts "Final size #{@seqs.size} saved in #{pruned_phylip}"
end

#save_as(newfile) ⇒ Object



107
108
109
# File 'lib/rphylip.rb', line 107

def save_as(newfile)
  self.save_seqs_as(@seqs, newfile)
end

#save_seqs_as(seqs, newfile) ⇒ Object



110
111
112
113
114
115
# File 'lib/rphylip.rb', line 110

def save_seqs_as(seqs, newfile)
  File.open(newfile, "w") do |f|
    f.puts "#{seqs.size} #{@seqlen}"
    seqs.each{|seq| f.puts seq}
  end
end

#subdivide(numseqs_base, num_subalignments) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/rphylip.rb', line 50

def subdivide(numseqs_base, num_subalignments)
  # Subdivides the present alignemnt in a file base.phy and num_subalignments (i) subi.phy 
  # iterarions generated are of equal size
  if numseqs_base.to_i + num_subalignments.to_i > @numtaxa then
    raise "Wrong input to subdivide phylip file"
  else
    fisher_yates_shuffle(@seqs)
    generate_base_alignment(numseqs_base)
    # Generaute pseudo-new subsets of sequences
    rest = @seqs[numseqs_base...@seqs.size]
    subset_size = rest.size / num_subalignments 
    update_sequences = [] 
    rest.each_slice(subset_size) do |subset|
      # Note the last slice may be shorter unless rest.size % num_subalignments == 0
      if update_sequences.size < num_subalignments
        update_sequences << subset # a new subset
      else
        update_sequences[num_subalignments - 1] += subset # appends to the last subset
      end
    end
    update_sequences.each_with_index do |seqs, i|
      generate_update(seqs, @filename + "_sequpdate_#{i}.phy")
    end
  end
  return update_sequences.size 
end

#subdivide_random(conf) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/rphylip.rb', line 76

def subdivide_random(conf)
  numseqs_base = conf[:initial_seqs]
  raise "too many seqs" if numseqs_base > @numtaxa
  fisher_yates_shuffle(@seqs)
  generate_base_alignment(numseqs_base)
  n = numseqs_base
  i = 0 #iteration id
  srand(12345) # be deterministic
  while n < @seqs.size
    num_newseqs = pseudonormal(conf[:min_size_update], conf[:max_size_update])
    num_newseqs = @seqs.size - n if (@seqs.size - n - num_newseqs < conf[:min_size_update])
    if conf[:updates_as_full_alignments].nil? or not conf[:updates_as_full_alignments]
      start = n
    else
      start = 0
    end
    generate_update(@seqs[start...n+num_newseqs], @filename + "_sequpdate_#{i}.phy")
    n += num_newseqs 
    i += 1
  end
  i
end