Module: Bio::Ngs::Cufflinks::GtfParser
- Included in:
- Gtf
- Defined in:
- lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb
Overview
TODO use a specific class for each block (transcript)
Instance Attribute Summary collapse
-
#lazy ⇒ Object
Returns the value of attribute lazy.
Instance Method Summary collapse
- #annotated_isoforms ⇒ Object
- #brand_new_isoforms ⇒ Object
-
#build_idx ⇒ Object
count.
-
#count ⇒ Object
save.
- #coverage_gt(size) ⇒ Object
-
#dump_idx(fn = nil) ⇒ Object
build_idx.
- #each_transcript(&block) ⇒ Object
- #get_transcript(n = 1) ⇒ Object (also: #[])
-
#index ⇒ Object
load_idx.
- #is_lazy? ⇒ Boolean
- #length_gt(length) ⇒ Object
-
#load_idx ⇒ Object
dump_idx.
- #mono_exon ⇒ Object
-
#multi_exon_with_lengh_and_coverage(length, coverage) ⇒ Object
select.
- #multi_exons ⇒ Object
- #new_isoforms ⇒ Object
- #not_lazy ⇒ Object
-
#read_transcript(n = 1) ⇒ Object
start from 1 n can be a number or a name for a transcript.
- #save(filename = nil) ⇒ Object
- #select(&block) ⇒ Object
-
#set_lazy ⇒ Object
to_bed.
-
#to_bed(only_exons = true, &block) ⇒ Object
to_gff3.
- #to_gff3(path = ".") ⇒ Object
Instance Attribute Details
#lazy ⇒ Object
Returns the value of attribute lazy.
11 12 13 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 11 def lazy @lazy end |
Instance Method Details
#annotated_isoforms ⇒ Object
98 99 100 101 102 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 98 def annotated_isoforms select do |transcript| transcript.annotated_isoform? end end |
#brand_new_isoforms ⇒ Object
86 87 88 89 90 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 86 def brand_new_isoforms select do |transcript| transcript.brand_new_isoform? end end |
#build_idx ⇒ Object
count
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 156 def build_idx idx = Hash.new {|h,k| h[k]=[]} idx[:transcripts] idx[:names]={} idx[:exons] each_transcript do |t, f_lno| # t_idx=(f_lno-t.exons.size-2) idx[:transcripts] << t.byte_length idx[:names][t.attributes[:transcript_id]] = idx[:transcripts].length # eidx_b = t_idx +1 # t.exons.each_index do |ei| # idx[t_idx] << eidx_b + ei # idx[:exons] << eidx_b + ei # end end @idx = idx end |
#count ⇒ Object
save
148 149 150 151 152 153 154 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 148 def count size = 0 each_transcript do size+=1 end size end |
#coverage_gt(size) ⇒ Object
104 105 106 107 108 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 104 def coverage_gt(size) select do |transcript| transcript.attributes[:cov] > size end end |
#dump_idx(fn = nil) ⇒ Object
build_idx
174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 174 def dump_idx(fn=nil) fn||="#{source.path}.idx" build_idx unless defined?(@idx) @idx[:default_hash] = @idx.default @idx.default = nil File.open(fn, "w+") do |f| Marshal.dump(@idx, f) end @idx.default = @idx[:default_hash] fn end |
#each_transcript(&block) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 13 def each_transcript(&block) if @blocks.nil? || @blocks.empty? transcript = Transcript.new @fh.rewind transcript.tra = @fh.readline @fh.each_line do |line| if line =~ /\ttranscript\t/ block.call(transcript, @fh.lineno) transcript.clear transcript.tra = line else line =~ /\texon\t/ transcript.exons << line end end else #lazy not_lazy blocks_to_run = @blocks @blocks=[] result=select do |transcript| bool_blocks = blocks_to_run.map do |b| b.call(transcript) end !(bool_blocks.include?(nil) || bool_blocks.include?(false)) end set_lazy result.send(:each_transcript, &block) end #lazy or not? end |
#get_transcript(n = 1) ⇒ Object Also known as: []
222 223 224 225 226 227 228 229 230 231 232 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 222 def get_transcript(n=1) x=nil if r=read_transcript(n) s=r.split("\n").first e=r.split("\n")[1..-1] x=Bio::Ngs::Cufflinks::Transcript.new x.tra= s+"\n" x.exons=e.map{|ei| ei+"\n"} end x end |
#index ⇒ Object
load_idx
197 198 199 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 197 def index @idx end |
#is_lazy? ⇒ Boolean
130 131 132 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 130 def is_lazy? @lazy end |
#length_gt(length) ⇒ Object
79 80 81 82 83 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 79 def length_gt(length) select do |transcript| transcript.size > length end end |
#load_idx ⇒ Object
dump_idx
187 188 189 190 191 192 193 194 195 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 187 def load_idx if File.exists?("#{source.path}.idx") @idx = Marshal.load(File.open("#{source.path}.idx")) @idx.default = @idx[:default_hash] else build_idx dump_idx end end |
#mono_exon ⇒ Object
72 73 74 75 76 77 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 72 def mono_exon # mark select do |transcript| transcript.mono_exon? #transcript line and exon line end end |
#multi_exon_with_lengh_and_coverage(length, coverage) ⇒ Object
select
59 60 61 62 63 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 59 def multi_exon_with_lengh_and_coverage(length, coverage) select do |transcript| transcript.multi_exons? && (transcript.size > length) && (transcript.attributes[:cov] > coverage) end end |
#multi_exons ⇒ Object
65 66 67 68 69 70 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 65 def multi_exons # mark select do |transcript| transcript.multi_exons? #transcript line and exon line end end |
#new_isoforms ⇒ Object
92 93 94 95 96 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 92 def new_isoforms select do |transcript| transcript.new_isoform? end end |
#not_lazy ⇒ Object
134 135 136 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 134 def not_lazy @lazy = false end |
#read_transcript(n = 1) ⇒ Object
start from 1 n can be a number or a name for a transcript
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 203 def read_transcript(n=1) load_idx unless defined?(@idx) if n.to_s.is_numeric? n = n.to_i if n==1 source.seek(0) source.read(@idx[:transcripts][0]) elsif n==2 source.seek(@idx[:transcripts][0]) source.read(@idx[:transcripts][n-1]) else source.seek(@idx[:transcripts][0..n-2].sum) source.read(@idx[:transcripts][n-1]) end else read_transcript(@idx[:names][n]) end end |
#save(filename = nil) ⇒ Object
138 139 140 141 142 143 144 145 146 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 138 def save(filename=nil) fn = filename || "#{@fh.path}.gtf" File.open(fn, 'w') do |f| each_transcript do |transcript| f.write transcript end end # dump_idx("#{fn}.idx") #BUGGY this saves the old index in case the user called a select end |
#select(&block) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 42 def select(&block) if is_lazy? @blocks||=[] @blocks << block self else # Find out how to concatenate multiple selections file = Tempfile.new("transcripts") each_transcript do |transcript| if block.call(transcript) file.write transcript.to_s end end gtf=Gtf.new(file.path) unless file.size == 0 end end |
#set_lazy ⇒ Object
to_bed
126 127 128 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 126 def set_lazy @lazy=true end |
#to_bed(only_exons = true, &block) ⇒ Object
to_gff3
120 121 122 123 124 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 120 def to_bed(only_exons=true, &block) each_transcript do |t| block.call(t, t.to_bed(only_exons)) end end |
#to_gff3(path = ".") ⇒ Object
110 111 112 113 114 115 116 117 118 |
# File 'lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb', line 110 def to_gff3(path=".") if File.exists?(File.join(path,"transcripts.gtf")) gffread = GffRead.new gffread.params = {output:"transcripts.gff3"} gffread.run :arguments=>["transcripts.gtf"], :separator=>'' else raise ArgumentError, "transcripts.gtf doesn't exists in #{path}" end end |