Class: Fasta

Inherits:
Object
  • Object
show all
Extended by:
GermDefault
Includes:
FastaAux
Defined in:
lib/fasta.rb

Defined Under Namespace

Classes: Chrom

Constant Summary

Constants included from GermDefault

GermDefault::CACHE

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from GermDefault

cache, cache_load, default, has_default, method_missing

Methods included from FastaAux

#get_nmer_freq, #get_seq_chunk, #get_seq_starts

Constructor Details

#initialize(file, size = nil, cen_file = nil) ⇒ Fasta

Returns a new instance of Fasta.



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/fasta.rb', line 112

def initialize file, size=nil, cen_file=nil
  @io = File.open(file)

  @line_size = size || Fasta.guess_line_size(file)

  get_seq_starts

  compute_chrom_stats

  shorten_seq_names

  if cen_file
    load_centromeres(cen_file)
  end
end

Instance Attribute Details

#chromsObject (readonly)

Returns the value of attribute chroms.



111
112
113
# File 'lib/fasta.rb', line 111

def chroms
  @chroms
end

#line_sizeObject (readonly)

Returns the value of attribute line_size.



111
112
113
# File 'lib/fasta.rb', line 111

def line_size
  @line_size
end

#seq_namesObject (readonly)

Returns the value of attribute seq_names.



111
112
113
# File 'lib/fasta.rb', line 111

def seq_names
  @seq_names
end

Class Method Details

.guess_line_size(file) ⇒ Object



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/fasta.rb', line 12

def guess_line_size file
  io = File.open file

  # Grab the first 50 lines
  lines = 50.times.map do io.gets end.compact

  chroms = lines.each_index.map.select do |i| lines[i] =~ /^>/ end

  chroms.each_index do |i|
    chrom_lines = lines[ chroms[i] + 1 ... chroms[i+1] || lines.size ]
    # you need at least 2 lines to be sure.
    next if chrom_lines.size < 2
    return chrom_lines.first.chomp.size if chrom_lines[0].size == chrom_lines[1].size
  end

  # You can't guess, raise an exception.
  raise "Could not guess file line size, please specify"
end

Instance Method Details

#chrom(name) ⇒ Object



128
129
130
# File 'lib/fasta.rb', line 128

def chrom name
  @chrom_alias[name]
end

#genome_sizeObject



132
133
134
# File 'lib/fasta.rb', line 132

def genome_size
  @genome_size ||= @seq_names.inject(0) { |s,name| s += chrom(name).size; s }
end

#get_masked_seq(seqname, start, stop) ⇒ Object

Raises:

  • (ArgumentError)


164
165
166
167
168
# File 'lib/fasta.rb', line 164

def get_masked_seq seqname, start, stop
  raise ArgumentError, "Improper interval #{seqname}:#{start}-#{stop}" if interval_missing?(GenomicLocus::Region.new(seqname,start,stop))

  get_seq_chunk(chrom(seqname).file_pos(start), chrom(seqname).file_pos(stop)).gsub(/\n/,'')
end

#get_seq(seqname, start, stop) ⇒ Object



155
156
157
158
# File 'lib/fasta.rb', line 155

def get_seq seqname, start, stop
  seq = get_masked_seq seqname, start, stop
  seq && seq.upcase
end

#inspectObject



146
147
148
# File 'lib/fasta.rb', line 146

def inspect
  "#<#{self.class.name}:#{object_id} @chroms=#{@seq_names.count}>"
end

#interval_missing?(locus) ⇒ Boolean

Returns:

  • (Boolean)


160
161
162
# File 'lib/fasta.rb', line 160

def interval_missing?(locus)
  !chrom(locus.seqname) || !chrom(locus.seqname).contains?(locus)
end

#load_centromeres(file) ⇒ Object



170
171
172
173
174
175
176
# File 'lib/fasta.rb', line 170

def load_centromeres file
  File.foreach(file).each do |line|
    seqname, cen_pos = line.split(/\t/)[0..1]
    cen_pos = cen_pos.to_i
    chrom(seqname).set_cen_pos(cen_pos)
  end
end

#locus_seq(locus) ⇒ Object

Raises:

  • (TypeError)


150
151
152
153
# File 'lib/fasta.rb', line 150

def locus_seq locus
  raise TypeError, "not a GenomicLocus!" unless locus.is_a? GenomicLocus
  get_seq locus.short_chrom, locus.start, locus.stop
end

#random_chromObject

find a random chromosome, weighted by size



137
138
139
# File 'lib/fasta.rb', line 137

def random_chrom
  position_from_total 1+rand(genome_size),true
end

#random_posObject

pick a random base in the genome



142
143
144
# File 'lib/fasta.rb', line 142

def random_pos
  position_from_total 1+rand(genome_size)
end