Module: Bio::GFFbrowser::Digest::Parser
- Includes:
- Helpers, Helpers::Logger, Helpers::Validate, Gff3Component, Gff3Features
- Defined in:
- lib/bio/db/gff/digest/gffparser.rb
Overview
Both in-memory and no-cache fully digest parsers share this Parser module.
Instance Method Summary collapse
-
#each_CDS ⇒ Object
Yield the id, recs, and containing component.
-
#each_CDS_seq ⇒ Object
Yield a unique description and the sequence.
-
#each_exon ⇒ Object
Yield the id, recs, and containing component.
-
#each_exon_seq ⇒ Object
Yield a unique description and the sequence.
-
#each_gene ⇒ Object
Yield the id, recs, containing component and sequence of genes.
-
#each_gene_seq ⇒ Object
Yield a unique description and the sequence.
-
#each_mRNA ⇒ Object
Yield the id, recs, containing component and sequence of mRNAs.
-
#each_mRNA_seq ⇒ Object
Yield a unique description and the sequence.
- #read_fasta ⇒ Object
- #show_unrecognized_features ⇒ Object
-
#store_record(rec) ⇒ Object
Takes a parsed record
rec
and stores items in the relevant lists/tables.
Methods included from Helpers::Logger
#debug, #error, #info, #log_sys_info, #warn
Methods included from Helpers::Validate
#validate_cdss, #validate_mrnas
Instance Method Details
#each_CDS ⇒ Object
Yield the id, recs, and containing component
90 91 92 93 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 90 def each_CDS parse if !@cdslist each_item(@cdslist) { |id, recs, component | yield id, recs, component } end |
#each_CDS_seq ⇒ Object
Yield a unique description and the sequence
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 132 def each_CDS_seq each_CDS do | id, reclist, component | if component # p id,reclist,component sequence = @sequencelist[component.seqname] # p sequence if sequence seq = assemble(sequence,component.start,reclist,@options.merge(:codonize=>true)) if seq.size % 3 != 0 p reclist # leave this in # raise "CDS size #{seq.size} is not a multiple of 3! <#{seq}>" warn "CDS size is not a multiple of 3",id end yield description(id,component,reclist), seq else warn "No sequence information for",id end end end end |
#each_exon ⇒ Object
Yield the id, recs, and containing component
96 97 98 99 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 96 def each_exon parse if !@exonlist each_item(@exonlist) { |id, recs, component | yield id, recs, component } end |
#each_exon_seq ⇒ Object
Yield a unique description and the sequence
154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 154 def each_exon_seq each_exon do | id, reclist, component | if component sequence = @sequencelist[component.seqname] if sequence seq = assemble(sequence,component.start,reclist) yield description(id,component,reclist), seq else warn "No sequence information for",id end end end end |
#each_gene ⇒ Object
Yield the id, recs, containing component and sequence of genes
78 79 80 81 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 78 def each_gene parse if !@orflist each_item(@orflist) { |id, recs, component | yield id, recs, component } end |
#each_gene_seq ⇒ Object
Yield a unique description and the sequence
102 103 104 105 106 107 108 109 110 111 112 113 114 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 102 def each_gene_seq each_gene do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence yield description(id,component,reclist), assemble(sequence,component.start,reclist) else warn "No sequence information for",id end end end end |
#each_mRNA ⇒ Object
Yield the id, recs, containing component and sequence of mRNAs
84 85 86 87 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 84 def each_mRNA parse if !@mrnalist each_item(@mrnalist) { |id, recs, component | yield id, recs, component } end |
#each_mRNA_seq ⇒ Object
Yield a unique description and the sequence
117 118 119 120 121 122 123 124 125 126 127 128 129 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 117 def each_mRNA_seq each_mRNA do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence yield description(id,component,reclist), assemble(sequence,component.start,reclist) else warn "No sequence information for",id end end end end |
#read_fasta ⇒ Object
64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 64 def read_fasta if @options[:fasta_filename] File.open(@options[:fasta_filename]) do | f | fasta = Bio::GFF::FastaReader.new(f) fasta.each do | id, fastarec | # p fastarec @sequencelist[id] = fastarec end end end # p :inmemory, @sequencelist end |
#show_unrecognized_features ⇒ Object
58 59 60 61 62 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 58 def show_unrecognized_features @unrecognized_features.keys.each do | k | warn "Unknown feature is ignored",k if k end end |
#store_record(rec) ⇒ Object
Takes a parsed record rec
and stores items in the relevant lists/tables
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/bio/db/gff/digest/gffparser.rb', line 28 def store_record rec return if rec.comment # skip GFF comments id = Helpers::Record::formatID(rec) @count_ids.add(id) @count_seqnames.add(rec.seqname) is_component = COMPONENT_TYPES.include?(rec.feature_type.upcase) if is_component # check for container ID warn("Container <#{rec.feature_type}> has no ID, so using sequence name instead",id) if rec.id == nil @componentlist[id] = rec info "Added feature <#{rec.feature_type}> with component ID",id # $stderr.print rec end case rec.feature_type when 'gene' || 'SO:0000704' @orflist.add(id,rec) when 'mRNA' || 'SO:0000234' @mrnalist.add(id,rec) when 'CDS' || 'SO:0000316' @cdslist.add(id,rec) when 'exon' || 'SO:0000147' @exonlist.add(id,rec) else if !is_component and !IGNORE_FEATURES.include?(rec.feature_type) @unrecognized_features[rec.feature_type] = true end end end |