Class: Bio::Iprscan::Report
Overview
DESCRIPTION
Class for InterProScan report. It is used to parse results and reformat results from (raw|xml|txt) into (html, xml, ebihtml, txt, gff3) format.
See ftp.ebi.ac.uk/pub/software/unix/iprscan/README.html
USAGE
# Read a marged.txt and split each entry.
Bio::Iprscan::Report.parse_txt(File.read("marged.txt")) do |report|
report.query_id
report.matches.size
report.matches.each do |match|
match.ipr_id #=> 'IPR...'
match.ipr_description
match.method
match.accession
match.description
match.match_start
match.match_end
match.evalue
end
# report.to_gff3
# report.to_html
end
Bio::Iprscan::Report.parse_raw(File.read("marged.raw")) do |report|
report.class #=> Bio::Iprscan::Report
end
Defined Under Namespace
Classes: Match
Constant Summary collapse
- RS =
Entry delimiter pattern.
DELIMITER = "\n\/\/\n"
Instance Attribute Summary collapse
-
#crc64 ⇒ Object
CRC64 checksum of query sequence.
-
#matches ⇒ Object
Matched InterPro motifs in Hash.
-
#query_id ⇒ Object
(also: #entry_id)
Qeury sequence name (entry_id).
-
#query_length ⇒ Object
Qeury sequence length.
Class Method Summary collapse
-
.parse_ptxt(io) ⇒ Object
Splits entry stream.
-
.parse_ptxt_entry(str) ⇒ Object
Parser method for a pseudo-txt formated entry.
-
.parse_raw(io) {|Bio::Iprscan::Report.parse_raw_entry(entry)| ... } ⇒ Object
USAGE Bio::Iprscan::Report.parse_raw(File.open(“merged.raw”)) do |report| report end.
-
.parse_raw_entry(str) ⇒ Object
Parser method for a raw formated entry.
-
.parse_txt(io) ⇒ Object
Splits the entry stream.
-
.parse_txt_entry(str) ⇒ Object
Parser method for a txt formated entry.
Instance Method Summary collapse
-
#format_raw ⇒ Object
def format_txt end.
-
#initialize ⇒ Report
constructor
A new instance of Report.
-
#output(format_type) ⇒ Object
Output interpro matches in the format_type.
-
#to_hash ⇒ Object
Returns a Hash (key as an Interpro ID and value as a Match).
Constructor Details
#initialize ⇒ Report
Returns a new instance of Report.
236 237 238 239 240 241 |
# File 'lib/bio/appl/iprscan/report.rb', line 236 def initialize @query_id = nil @query_length = nil @crc64 = nil @matches = [] end |
Instance Attribute Details
#crc64 ⇒ Object
CRC64 checksum of query sequence.
59 60 61 |
# File 'lib/bio/appl/iprscan/report.rb', line 59 def crc64 @crc64 end |
#matches ⇒ Object
Matched InterPro motifs in Hash. Each InterPro motif have :name, :definition, :accession and :motifs keys. And :motifs key contains motifs in Array. Each motif have :method, :accession, :definition, :score, :location_from and :location_to keys.
65 66 67 |
# File 'lib/bio/appl/iprscan/report.rb', line 65 def matches @matches end |
#query_id ⇒ Object Also known as: entry_id
Qeury sequence name (entry_id).
52 53 54 |
# File 'lib/bio/appl/iprscan/report.rb', line 52 def query_id @query_id end |
#query_length ⇒ Object
Qeury sequence length.
56 57 58 |
# File 'lib/bio/appl/iprscan/report.rb', line 56 def query_length @query_length end |
Class Method Details
.parse_ptxt(io) ⇒ Object
194 195 196 197 198 |
# File 'lib/bio/appl/iprscan/report.rb', line 194 def self.parse_ptxt(io) io.each("\n\/\/\n") do |entry| yield self.parse_ptxt_entry(entry) end end |
.parse_ptxt_entry(str) ⇒ Object
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 |
# File 'lib/bio/appl/iprscan/report.rb', line 209 def self.parse_ptxt_entry(str) report = self.new ipr_line = '' str.split(/\n/).each do |line| line = line.split("\t") if line.size == 2 report.query_id = line[0] report.query_length = line[1].to_i elsif line.first == '//' elsif line.first == 'InterPro' ipr_line = line else startp, endp = line[4].split("-") report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => line[0], :accession => line[1], :description => line[2], :evalue => line[3], :match_start => startp.to_i, :match_end => endp.to_i) end end report end |
.parse_raw(io) {|Bio::Iprscan::Report.parse_raw_entry(entry)| ... } ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/bio/appl/iprscan/report.rb', line 72 def self.parse_raw(io) entry = '' while line = io.gets if entry != '' and entry.split("\t").first == line.split("\t").first entry << line elsif entry != '' yield Bio::Iprscan::Report.parse_raw_entry(entry) entry = line else entry << line end end yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != '' end |
.parse_raw_entry(str) ⇒ Object
Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report object.
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
# File 'lib/bio/appl/iprscan/report.rb', line 89 def self.parse_raw_entry(str) report = self.new str.split(/\n/).each do |line| line = line.split("\t") report.matches << Match.new(:query_id => line[0], :crc64 => line[1], :query_length => line[2].to_i, :method => line[3], :accession => line[4], :description => line[5], :match_start => line[6].to_i, :match_end => line[7].to_i, :evalue => line[8], :status => line[9], :date => line[10]) if line[11] report.matches.last.ipr_id = line[11] report.matches.last.ipr_description = line[12] end report.matches.last.go_terms = line[13].scan(/(\w+ \w+\:.+? \(GO:\d+\))/).flatten if line[13] end report.query_id = report.matches.first.query_id report.query_length = report.matches.first.query_length report end |
.parse_txt(io) ⇒ Object
130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/bio/appl/iprscan/report.rb', line 130 def self.parse_txt(io) io.each("\n\nSequence") do |entry| if entry =~ /Sequence$/ entry = entry.sub(/Sequence$/, '') end unless entry =~ /^Sequence/ entry = 'Sequence' + entry end yield self.parse_txt_entry(entry) end end |
.parse_txt_entry(str) ⇒ Object
Parser method for a txt formated entry. Returns a Bio::Iprscan::Report object.
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
# File 'lib/bio/appl/iprscan/report.rb', line 147 def self.parse_txt_entry(str) unless str =~ /^Sequence / raise ArgumentError, "Invalid format: \n\n#{str}" end header, *matches = str.split(/\n\n/) report = self.new report.query_id = if header =~ /Sequence \"(.+)\" / then $1 else '' end report.query_length = if header =~ /length: (\d+) aa./ then $1.to_i else nil end report.crc64 = if header =~ /crc64 checksum: (\S+) / then $1 else nil end ipr_line = '' go_annotation = '' matches.each do |m| m = m.split(/\n/).map {|x| x.split(/ +/) } m.each do |match| case match[0] when 'method' when /(Molecular Function|Cellular Component|Biological Process):/ go_annotation = match[0].scan(/([MCB]\w+ \w+): (\S.+?\S) \((GO:\d+)\),*/) when 'InterPro' ipr_line = match else pos_scores = match[3].scan(/(\S)\[(\d+)-(\d+)\] (\S+) */) pos_scores.each do |pos_score| report.matches << Match.new(:ipr_id => ipr_line[1], :ipr_description => ipr_line[2], :method => match[0], :accession => match[1], :description => match[2], :evalue => pos_score[3], :status => pos_score[0], :match_start => pos_score[1].to_i, :match_end => pos_score[2].to_i, :go_terms => go_annotation) end end end end return report end |
Instance Method Details
#format_raw ⇒ Object
def format_txt
end
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 |
# File 'lib/bio/appl/iprscan/report.rb', line 266 def format_raw @matches.map { |match| [self.query_id, self.crc64, self.query_length, match.method_name, match.accession, match.description, match.match_start, match.match_end, match.evalue, match.status, match.date, match.ipr_id, match.ipr_description, match.go_terms.map {|x| x[0] + ': ' + x[1] + ' (' + x[2] + ')' }.join(', ') ].join("\t") }.join("\n") end |
#output(format_type) ⇒ Object
Output interpro matches in the format_type.
245 246 247 248 249 250 251 252 |
# File 'lib/bio/appl/iprscan/report.rb', line 245 def output(format_type) case format_type when 'raw', :raw format_raw else raise NameError, "Invalid format_type." end end |
#to_hash ⇒ Object
Returns a Hash (key as an Interpro ID and value as a Match).
report.to_hash.each do |ipr_id, matches|
matches.each do |match|
report.matches.ipr_id == ipr_id #=> true
end
end
298 299 300 301 302 303 304 305 306 307 308 309 |
# File 'lib/bio/appl/iprscan/report.rb', line 298 def to_hash unless @ipr_ids @ipr_ids = {} @matches.each_with_index do |match, i| @ipr_ids[match.ipr_id] ||= [] @ipr_ids[match.ipr_id] << match end return @ipr_ids else return @ipr_ids end end |