Class: GFF3

Inherits:
Object
  • Object
show all
Defined in:
lib/bio-pangenome/gff3_extensions.rb

Defined Under Namespace

Classes: CDS_feature

Instance Method Summary collapse

Constructor Details

#initialize(file: "", is_gz: true) ⇒ GFF3

Returns a new instance of GFF3.



12
13
14
15
# File 'lib/bio-pangenome/gff3_extensions.rb', line 12

def initialize(file: "", is_gz: true)
	@file = file
	@is_gz = is_gz
end

Instance Method Details

#bedAroundGene(distance: 1000, out: $stdout) ⇒ Object



90
91
92
93
94
95
96
97
# File 'lib/bio-pangenome/gff3_extensions.rb', line 90

def bedAroundGene(distance:1000, out:$stdout)
	each_gene do |record|
		start = record.start-distance
		start = 1 if start < 1
		reg_end=record.end + distance
		out.puts [record.seqid, start, reg_end, "#{record.id}_#{record.source}_#{distance}bp", ".", record.strand].join "\t"
	end
end

#calculate_mrna_statsObject



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/bio-pangenome/gff3_extensions.rb', line 66

def calculate_mrna_stats
	return if @mrna_stats
	@mrna_stats = Hash.new {|h,k| h[k] = MrnaStats.new(0,0) }
	last_mrna = ""
	last_record = nil
	each_cds do |record|
		parent = record.get_attribute "Parent"
		mrna = @mrna_stats[parent]
		mrna.cds_count += 1
		if last_mrna == parent
			distance =  record.start - last_record.end 
			mrna.cds_max_gap = distance if distance > mrna.cds_max_gap
		end
		last_record = record
		last_mrna   = parent
	end
	return
end

#cds_to_print(mrna, cannonical_exons: [], colors: ["#a6cee3", "#1f78b4", "#b2df8a" , "#33a02c", "#fb9a99", "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"]) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/bio-pangenome/gff3_extensions.rb', line 100

def cds_to_print(mrna,cannonical_exons:[], colors:["#a6cee3", "#1f78b4", "#b2df8a" , "#33a02c", "#fb9a99",  "#e31a1c", "#fdbf6f", "#ff7f00", "#cab2d6", "#6a3d9a"])

	cds_features = [] 
	i = 0
	offset=0
	offset_start=0
	each_cds do |record|
		target = record.get_attribute "Target"
		arr = target.split(" ")
		col = colors[i % colors.size ]
		start = arr[1].to_i + offset
		ends = arr[2].to_i + offset
		offset_start = record.start  if offset_start == 0
		tmp = CDS_feature.new(start, ends, col, 
			record.seqid, record.start,record.end, record.start - offset_start )
		cds_features << tmp
		i += 1
	end
	cds_features
end

#eachObject



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/bio-pangenome/gff3_extensions.rb', line 17

def each
	return enum_for(:each) unless block_given? 
	io = nil
	if @is_gz
		infile = open(@file)
		io = Zlib::GzipReader.new(infile) 
	else
		io =  File.open(@file)
	end
	parser = Bio::GFFbrowser::FastLineParser
	io.each_line do |line|  
		line.encode!('UTF-8', 'UTF-8', :invalid => :replace)
		line.strip!
		break if line == '##FASTA'
		next if line.length == 0 or line =~ /^#/
		begin
			record = Bio::GFFbrowser::FastLineRecord.new(parser.parse_line_fast(line))
			yield record
		rescue Exception => e
			$stderr.puts "Unable to parse '#{line}'\n#{e}" 
			throw e
		end
	end
end

#each_cdsObject



58
59
60
61
62
63
64
# File 'lib/bio-pangenome/gff3_extensions.rb', line 58

def each_cds
	return enum_for(:each_mrna) unless block_given? 
	self.each do |record|
		next unless record.feature == "CDS"
		yield record
	end 
end

#each_geneObject



42
43
44
45
46
47
48
# File 'lib/bio-pangenome/gff3_extensions.rb', line 42

def each_gene
	return enum_for(:each_gene) unless block_given? 
	self.each do |record|
		next unless record.feature == "gene"
		yield record
	end 
end

#each_mrnaObject



50
51
52
53
54
55
56
# File 'lib/bio-pangenome/gff3_extensions.rb', line 50

def each_mrna
	return enum_for(:each_mrna) unless block_given? 
	self.each do |record|
		next unless record.feature == "mRNA"
		yield record
	end 
end

#mrna_info(id) ⇒ Object



85
86
87
88
# File 'lib/bio-pangenome/gff3_extensions.rb', line 85

def mrna_info(id)
	calculate_mrna_stats
	@mrna_stats[id] 
end