Class: Fasta
- Inherits:
-
Object
- Object
- Fasta
- Extended by:
- GermDefault
- Includes:
- FastaAux
- Defined in:
- lib/fasta.rb
Defined Under Namespace
Classes: Chrom
Constant Summary
Constants included from GermDefault
Instance Attribute Summary collapse
-
#chroms ⇒ Object
readonly
Returns the value of attribute chroms.
-
#line_size ⇒ Object
readonly
Returns the value of attribute line_size.
-
#seq_names ⇒ Object
readonly
Returns the value of attribute seq_names.
Class Method Summary collapse
Instance Method Summary collapse
- #chrom(name) ⇒ Object
- #genome_size ⇒ Object
- #get_masked_seq(seqname, start, stop) ⇒ Object
- #get_seq(seqname, start, stop) ⇒ Object
-
#initialize(file, size = nil, cen_file = nil) ⇒ Fasta
constructor
A new instance of Fasta.
- #inspect ⇒ Object
- #interval_missing?(locus) ⇒ Boolean
- #load_centromeres(file) ⇒ Object
- #locus_seq(locus) ⇒ Object
-
#random_chrom ⇒ Object
find a random chromosome, weighted by size.
-
#random_pos ⇒ Object
pick a random base in the genome.
Methods included from GermDefault
cache, cache_load, default, has_default, method_missing
Methods included from FastaAux
#get_nmer_freq, #get_seq_chunk, #get_seq_starts
Constructor Details
#initialize(file, size = nil, cen_file = nil) ⇒ Fasta
Returns a new instance of Fasta.
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/fasta.rb', line 112 def initialize file, size=nil, cen_file=nil @io = File.open(file) @line_size = size || Fasta.guess_line_size(file) get_seq_starts compute_chrom_stats shorten_seq_names if cen_file load_centromeres(cen_file) end end |
Instance Attribute Details
#chroms ⇒ Object (readonly)
Returns the value of attribute chroms.
111 112 113 |
# File 'lib/fasta.rb', line 111 def chroms @chroms end |
#line_size ⇒ Object (readonly)
Returns the value of attribute line_size.
111 112 113 |
# File 'lib/fasta.rb', line 111 def line_size @line_size end |
#seq_names ⇒ Object (readonly)
Returns the value of attribute seq_names.
111 112 113 |
# File 'lib/fasta.rb', line 111 def seq_names @seq_names end |
Class Method Details
.guess_line_size(file) ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
# File 'lib/fasta.rb', line 12 def guess_line_size file io = File.open file # Grab the first 50 lines lines = 50.times.map do io.gets end.compact chroms = lines.each_index.map.select do |i| lines[i] =~ /^>/ end chroms.each_index do |i| chrom_lines = lines[ chroms[i] + 1 ... chroms[i+1] || lines.size ] # you need at least 2 lines to be sure. next if chrom_lines.size < 2 return chrom_lines.first.chomp.size if chrom_lines[0].size == chrom_lines[1].size end # You can't guess, raise an exception. raise "Could not guess file line size, please specify" end |
Instance Method Details
#chrom(name) ⇒ Object
128 129 130 |
# File 'lib/fasta.rb', line 128 def chrom name @chrom_alias[name] end |
#genome_size ⇒ Object
132 133 134 |
# File 'lib/fasta.rb', line 132 def genome_size @genome_size ||= @seq_names.inject(0) { |s,name| s += chrom(name).size; s } end |
#get_masked_seq(seqname, start, stop) ⇒ Object
164 165 166 167 168 |
# File 'lib/fasta.rb', line 164 def get_masked_seq seqname, start, stop raise ArgumentError, "Improper interval #{seqname}:#{start}-#{stop}" if interval_missing?(GenomicLocus::Region.new(seqname,start,stop)) get_seq_chunk(chrom(seqname).file_pos(start), chrom(seqname).file_pos(stop)).gsub(/\n/,'') end |
#get_seq(seqname, start, stop) ⇒ Object
155 156 157 158 |
# File 'lib/fasta.rb', line 155 def get_seq seqname, start, stop seq = get_masked_seq seqname, start, stop seq && seq.upcase end |
#inspect ⇒ Object
146 147 148 |
# File 'lib/fasta.rb', line 146 def inspect "#<#{self.class.name}:#{object_id} @chroms=#{@seq_names.count}>" end |
#interval_missing?(locus) ⇒ Boolean
160 161 162 |
# File 'lib/fasta.rb', line 160 def interval_missing?(locus) !chrom(locus.seqname) || !chrom(locus.seqname).contains?(locus) end |
#load_centromeres(file) ⇒ Object
170 171 172 173 174 175 176 |
# File 'lib/fasta.rb', line 170 def load_centromeres file File.foreach(file).each do |line| seqname, cen_pos = line.split(/\t/)[0..1] cen_pos = cen_pos.to_i chrom(seqname).set_cen_pos(cen_pos) end end |
#locus_seq(locus) ⇒ Object
150 151 152 153 |
# File 'lib/fasta.rb', line 150 def locus_seq locus raise TypeError, "not a GenomicLocus!" unless locus.is_a? GenomicLocus get_seq locus.short_chrom, locus.start, locus.stop end |
#random_chrom ⇒ Object
find a random chromosome, weighted by size
137 138 139 |
# File 'lib/fasta.rb', line 137 def random_chrom position_from_total 1+rand(genome_size),true end |
#random_pos ⇒ Object
pick a random base in the genome
142 143 144 |
# File 'lib/fasta.rb', line 142 def random_pos position_from_total 1+rand(genome_size) end |