Class: GFF3
- Inherits:
-
Object
- Object
- GFF3
- Defined in:
- lib/bio-pangenome/gff3_extensions.rb
Defined Under Namespace
Classes: CDS_feature
Instance Method Summary collapse
- #bedAroundGene(distance: 1000, out: $stdout) ⇒ Object
- #calculate_mrna_stats ⇒ Object
- #cds_to_print(mrna, cannonical_exons: [], colors: ["#a6cee3", "#1f78b4", "#b2df8a" , "#33a02c", "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"]) ⇒ Object
- #each ⇒ Object
- #each_cds ⇒ Object
- #each_gene ⇒ Object
- #each_mrna ⇒ Object
-
#initialize(file: "", is_gz: true) ⇒ GFF3
constructor
A new instance of GFF3.
- #mrna_info(id) ⇒ Object
Constructor Details
#initialize(file: "", is_gz: true) ⇒ GFF3
Returns a new instance of GFF3.
12 13 14 15 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 12 def initialize(file: "", is_gz: true) @file = file @is_gz = is_gz end |
Instance Method Details
#bedAroundGene(distance: 1000, out: $stdout) ⇒ Object
90 91 92 93 94 95 96 97 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 90 def bedAroundGene(distance:1000, out:$stdout) each_gene do |record| start = record.start-distance start = 1 if start < 1 reg_end=record.end + distance out.puts [record.seqid, start, reg_end, "#{record.id}_#{record.source}_#{distance}bp", ".", record.strand].join "\t" end end |
#calculate_mrna_stats ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 66 def calculate_mrna_stats return if @mrna_stats @mrna_stats = Hash.new {|h,k| h[k] = MrnaStats.new(0,0) } last_mrna = "" last_record = nil each_cds do |record| parent = record.get_attribute "Parent" mrna = @mrna_stats[parent] mrna.cds_count += 1 if last_mrna == parent distance = record.start - last_record.end mrna.cds_max_gap = distance if distance > mrna.cds_max_gap end last_record = record last_mrna = parent end return end |
#cds_to_print(mrna, cannonical_exons: [], colors: ["#a6cee3", "#1f78b4", "#b2df8a" , "#33a02c", "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"]) ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 100 def cds_to_print(mrna,cannonical_exons:[], colors:["#a6cee3", "#1f78b4", "#b2df8a" , "#33a02c", "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"]) cds_features = [] i = 0 offset=0 offset_start=0 each_cds do |record| target = record.get_attribute "Target" arr = target.split(" ") col = colors[i % colors.size ] start = arr[1].to_i + offset ends = arr[2].to_i + offset offset_start = record.start if offset_start == 0 tmp = CDS_feature.new(start, ends, col, record.seqid, record.start,record.end, record.start - offset_start ) cds_features << tmp i += 1 end cds_features end |
#each ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 17 def each return enum_for(:each) unless block_given? io = nil if @is_gz infile = open(@file) io = Zlib::GzipReader.new(infile) else io = File.open(@file) end parser = Bio::GFFbrowser::FastLineParser io.each_line do |line| line.encode!('UTF-8', 'UTF-8', :invalid => :replace) line.strip! break if line == '##FASTA' next if line.length == 0 or line =~ /^#/ begin record = Bio::GFFbrowser::FastLineRecord.new(parser.parse_line_fast(line)) yield record rescue Exception => e $stderr.puts "Unable to parse '#{line}'\n#{e}" throw e end end end |
#each_cds ⇒ Object
58 59 60 61 62 63 64 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 58 def each_cds return enum_for(:each_mrna) unless block_given? self.each do |record| next unless record.feature == "CDS" yield record end end |
#each_gene ⇒ Object
42 43 44 45 46 47 48 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 42 def each_gene return enum_for(:each_gene) unless block_given? self.each do |record| next unless record.feature == "gene" yield record end end |
#each_mrna ⇒ Object
50 51 52 53 54 55 56 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 50 def each_mrna return enum_for(:each_mrna) unless block_given? self.each do |record| next unless record.feature == "mRNA" yield record end end |
#mrna_info(id) ⇒ Object
85 86 87 88 |
# File 'lib/bio-pangenome/gff3_extensions.rb', line 85 def mrna_info(id) calculate_mrna_stats @mrna_stats[id] end |