Class: Bio::GFFbrowser::Block::GffBlockParser
- Inherits:
-
Object
- Object
- Bio::GFFbrowser::Block::GffBlockParser
show all
- Includes:
- FastLineParser
- Defined in:
- lib/bio/db/gff/block/gffblockparser.rb
Overview
The block parser simplifies parsing, by assuming GFF3 is organised into blocks. All relevant information is resolved a block at a time.
Instance Method Summary
collapse
#parse_attributes_fast, #parse_line_fast
#debug, #error, #info, #log_sys_info, #warn
Constructor Details
#initialize(filename, options) ⇒ GffBlockParser
Returns a new instance of GffBlockParser.
13
14
15
16
17
18
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 13
def initialize filename, options
info "Starting block parser"
@filename = filename
@options = options
@iter = Bio::GFF::GFF3::FileIterator.new(@filename)
end
|
Instance Method Details
#each_CDS_seq ⇒ Object
86
87
88
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 86
def each_CDS_seq
each_seq('cds') { | id, seq | yield id,seq }
end
|
#each_exon_seq ⇒ Object
82
83
84
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 82
def each_exon_seq
each_seq('exon') { | id, seq | yield id,seq }
end
|
#each_gene_seq ⇒ Object
74
75
76
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 74
def each_gene_seq
each_seq('gene') { | id, seq | yield id,seq }
end
|
#each_mRNA_seq ⇒ Object
78
79
80
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 78
def each_mRNA_seq
each_seq('mrna') { | id, seq | yield id,seq }
end
|
#each_seq(gfftype) ⇒ Object
70
71
72
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 70
def each_seq(gfftype)
parse(gfftype) { | id, seq | yield id,seq }
end
|
#parse(gfftype) ⇒ Object
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 20
def parse(gfftype)
@inseqidlist = {}
@sequencelist = {}
if @options[:fasta_filename]
File.open(@options[:fasta_filename]) do | f |
fasta = Bio::GFF::FastaReader.new(f)
fasta.each do | id, fastarec |
@sequencelist[id] = fastarec
end
end
else
@iter.each_sequence do | id, bioseq |
@sequencelist[id] = bioseq.to_s
end
end
seqid = nil
recs = []
@iter.each_rec do | fpos, line |
rec = FastLineRecord.new(parse_line_fast(line))
if seqid != rec.seqid
if @inseqidlist[rec.seqid]
error "GFF3 file not sorted, falling back to line parser"
raise "ERROR, bailing out"
end
parse_block(gfftype,recs,@sequencelist[seqid]) { | id, seq | yield id,seq } if seqid
recs = []
seqid = rec.seqid
@inseqidlist[seqid] = true
end
recs.push rec
end
parse_block(gfftype,recs,@sequencelist[seqid]) { | id, seq | yield id,seq } if seqid
end
|
#parse_block(gfftype, recs, sequence) ⇒ Object
Parse sequence objects sharing the same seqid and yield each gfftype
as an iq,seq
62
63
64
65
66
67
68
|
# File 'lib/bio/db/gff/block/gffblockparser.rb', line 62
def parse_block gfftype, recs, sequence
recs.each do | rec |
if rec.feature_type.downcase == gfftype
yield rec.id, sequence[rec.start-1..rec.end-1]
end
end
end
|