Class: Phylip
- Inherits:
-
Object
- Object
- Phylip
- Defined in:
- lib/rphylip.rb
Instance Attribute Summary collapse
-
#numtaxa ⇒ Object
readonly
Returns the value of attribute numtaxa.
-
#seqlen ⇒ Object
readonly
Returns the value of attribute seqlen.
-
#seqs ⇒ Object
readonly
Returns the value of attribute seqs.
Instance Method Summary collapse
- #expand_with(phylipfile) ⇒ Object
- #extract_partition(from_pos, to_pos) ⇒ Object
- #generate_base_alignment(numseqs_base) ⇒ Object
- #generate_update(seqs, update_filename) ⇒ Object
-
#initialize(phylipfile) ⇒ Phylip
constructor
A new instance of Phylip.
- #names ⇒ Object
- #remove_taxa(taxa, pruned_phylip) ⇒ Object
- #save_as(newfile) ⇒ Object
- #save_seqs_as(seqs, newfile) ⇒ Object
- #subdivide(numseqs_base, num_subalignments) ⇒ Object
- #subdivide_random(conf) ⇒ Object
Constructor Details
#initialize(phylipfile) ⇒ Phylip
Returns a new instance of Phylip.
19 20 21 22 23 24 25 26 27 |
# File 'lib/rphylip.rb', line 19 def initialize(phylipfile) raise "File #{phylipfile} does not exist" unless File.exists?(phylipfile) @filename = phylipfile @seqs = File.open(phylipfile).readlines @numtaxa, @seqlen = @seqs[0].split.map{|w| w.to_i} @seqs.delete_at(0) @seqs.delete_if{|l| l=~ /^\s+$/} raise "wrong number of seqs,parsed #{@seqs.size} expected ntaxa #{@numtaxa}" unless @seqs.size == @numtaxa end |
Instance Attribute Details
#numtaxa ⇒ Object (readonly)
Returns the value of attribute numtaxa.
18 19 20 |
# File 'lib/rphylip.rb', line 18 def numtaxa @numtaxa end |
#seqlen ⇒ Object (readonly)
Returns the value of attribute seqlen.
18 19 20 |
# File 'lib/rphylip.rb', line 18 def seqlen @seqlen end |
#seqs ⇒ Object (readonly)
Returns the value of attribute seqs.
18 19 20 |
# File 'lib/rphylip.rb', line 18 def seqs @seqs end |
Instance Method Details
#expand_with(phylipfile) ⇒ Object
116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/rphylip.rb', line 116 def (phylipfile) additional_phylip = Phylip.new(phylipfile) if additional_phylip.seqlen == self.seqlen then additional_phylip.seqs.each do |newseq| @seqs << newseq @numtaxa += 1 end else raise "different sequence lengths for new #{phylipfile}, cannot be expanded" end end |
#extract_partition(from_pos, to_pos) ⇒ Object
127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/rphylip.rb', line 127 def extract_partition(from_pos, to_pos) @seqlen = to_pos - from_pos + 1 newseqs = [] from = from_pos - 1 to = to_pos - 1 @seqs.each do |seq| name, info = seq.split newseqs << name + " " + info.slice!(from..to) end @seqs = newseqs self.save_as(@filename + "_from#{from_pos}_to#{to_pos}") end |
#generate_base_alignment(numseqs_base) ⇒ Object
36 37 38 39 40 41 42 |
# File 'lib/rphylip.rb', line 36 def generate_base_alignment(numseqs_base) seqs = @seqs[0...numseqs_base].select{|s| s.split.last.split(//).uniq.size > 1} basename = @filename + "_initial" puts "Saving base alignment as #{basename} with #{seqs.size} seqs" raise "too few seqs" unless seqs.size > 4 self.save_seqs_as(seqs, basename) end |
#generate_update(seqs, update_filename) ⇒ Object
43 44 45 46 47 48 49 |
# File 'lib/rphylip.rb', line 43 def generate_update(seqs, update_filename) # remove from seqs single char stuff seqs = seqs.select{|s| s.split.last.split(//).uniq.size > 1} puts "Saving update alignment as #{update_filename} with #{seqs.size} seqs" raise "too few seqs" unless seqs.size > 4 self.save_seqs_as(seqs, update_filename) end |
#names ⇒ Object
28 29 30 31 32 33 34 35 |
# File 'lib/rphylip.rb', line 28 def names ali_names = [] @seqs.each do |seq| ali_names << seq.split.first.strip end raise "unexpected #names" unless ali_names.size == @numtaxa ali_names end |
#remove_taxa(taxa, pruned_phylip) ⇒ Object
98 99 100 101 102 103 104 105 106 |
# File 'lib/rphylip.rb', line 98 def remove_taxa(taxa, pruned_phylip) puts "Original size #{@seqs.size}, after removal expect #{@seqs.size - taxa.size}" raise "empty list of taxa to prune" if not taxa or taxa.empty? taxa.each do |taxon| @seqs.delete_if{|l| l.split.first.strip == taxon} end self.save_as(pruned_phylip) puts "Final size #{@seqs.size} saved in #{pruned_phylip}" end |
#save_as(newfile) ⇒ Object
107 108 109 |
# File 'lib/rphylip.rb', line 107 def save_as(newfile) self.save_seqs_as(@seqs, newfile) end |
#save_seqs_as(seqs, newfile) ⇒ Object
110 111 112 113 114 115 |
# File 'lib/rphylip.rb', line 110 def save_seqs_as(seqs, newfile) File.open(newfile, "w") do |f| f.puts "#{seqs.size} #{@seqlen}" seqs.each{|seq| f.puts seq} end end |
#subdivide(numseqs_base, num_subalignments) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/rphylip.rb', line 50 def subdivide(numseqs_base, num_subalignments) # Subdivides the present alignemnt in a file base.phy and num_subalignments (i) subi.phy # iterarions generated are of equal size if numseqs_base.to_i + num_subalignments.to_i > @numtaxa then raise "Wrong input to subdivide phylip file" else fisher_yates_shuffle(@seqs) generate_base_alignment(numseqs_base) # Generaute pseudo-new subsets of sequences rest = @seqs[numseqs_base...@seqs.size] subset_size = rest.size / num_subalignments update_sequences = [] rest.each_slice(subset_size) do |subset| # Note the last slice may be shorter unless rest.size % num_subalignments == 0 if update_sequences.size < num_subalignments update_sequences << subset # a new subset else update_sequences[num_subalignments - 1] += subset # appends to the last subset end end update_sequences.each_with_index do |seqs, i| generate_update(seqs, @filename + "_sequpdate_#{i}.phy") end end return update_sequences.size end |
#subdivide_random(conf) ⇒ Object
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/rphylip.rb', line 76 def subdivide_random(conf) numseqs_base = conf[:initial_seqs] raise "too many seqs" if numseqs_base > @numtaxa fisher_yates_shuffle(@seqs) generate_base_alignment(numseqs_base) n = numseqs_base i = 0 #iteration id srand(12345) # be deterministic while n < @seqs.size num_newseqs = pseudonormal(conf[:min_size_update], conf[:max_size_update]) num_newseqs = @seqs.size - n if (@seqs.size - n - num_newseqs < conf[:min_size_update]) if conf[:updates_as_full_alignments].nil? or not conf[:updates_as_full_alignments] start = n else start = 0 end generate_update(@seqs[start...n+num_newseqs], @filename + "_sequpdate_#{i}.phy") n += num_newseqs i += 1 end i end |