Class: Bio::GFFbrowser::Digest::LruCache

Inherits:
Object
  • Object
show all
Includes:
LruCacheHelpers, Parser, Gff3Sequence, LruRec
Defined in:
lib/bio/db/gff/digest/gfflrucache.rb

Instance Method Summary collapse

Methods included from Parser

#each_CDS, #each_CDS_seq, #each_exon, #each_exon_seq, #each_gene, #each_gene_seq, #each_mRNA, #each_mRNA_seq, #read_fasta, #show_unrecognized_features, #store_record

Methods included from Helpers::Logger

#debug, #error, #info, #log_sys_info, #warn

Methods included from Helpers::Validate

#validate_cdss, #validate_mrnas

Constructor Details

#initialize(filename, options) ⇒ LruCache

Returns a new instance of LruCache.



143
144
145
146
147
148
# File 'lib/bio/db/gff/digest/gfflrucache.rb', line 143

def initialize filename, options
  @filename = filename
  @options = options
  @iter = Bio::GFF::GFF3::FileIterator.new(@filename)
  @lru = LruTracker.new
end

Instance Method Details

#each_item(list) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# File 'lib/bio/db/gff/digest/gfflrucache.rb', line 185

def each_item list
  # p list.class
  fh = @iter.fh
  list.each do | id, io_seeklist |
    recs = []
    io_seeklist.each do | fpos |
      recs << fetch(fh,fpos,@options[:parser])
    end
    seqid = recs[0].seqname
    component = find_component(recs[0])
    if @options[:no_assemble]
      recs.each do | rec |
        yield id, [rec], component
      end
    else
      yield id, recs, component
    end
  end
  @lru.display('After iterating')
end

#parseObject

parse the whole file once and store all seek locations, rather than the records themselves



152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/bio/db/gff/digest/gfflrucache.rb', line 152

def parse
  info "---- Digest DB and store data in mRNA Hash (LruCache)"
  @count_ids          = Counter.new   # Count ids
  @count_seqnames     = Counter.new   # Count seqnames
  @componentlist      = SeekRecList.new(@iter.fh,@options[:parser],@lru) # Store containers, like genes, contigs
  @orflist            = SeekLinkedRecs.new   # Store linked gene records
  @mrnalist           = SeekLinkedRecs.new   # Store linked mRNA records
  @cdslist            = SeekLinkedRecs.new
  @exonlist           = SeekLinkedRecs.new
  @sequencelist       = {}
  @unrecognized_features = {}
  @iter.each_rec do |fpos, line|
    rec = case @options[:parser]
      when :bioruby
        Bio::GFF::GFF3::BioRubyFileRecord.new(fpos, line)
      when :line
        Bio::GFF::GFF3::FastParserFileRecord.new(fpos, line)
      else
        raise 'Unknown parser'
    end
    store_record(rec)
  end
  @iter.each_sequence do | id, bioseq |
    @sequencelist[id] = bioseq.to_s
  end
  validate_mrnas 
  validate_cdss
  show_unrecognized_features
  @genelist      = @count_ids.keys 
  read_fasta
  @lru.display('After reading files')
end