Class: Ms::Sequest::Srf
- Inherits:
-
Object
- Object
- Ms::Sequest::Srf
- Defined in:
- lib/ms/sequest/srf.rb,
lib/ms/sequest/srf/sqt.rb,
lib/ms/sequest/srf/search.rb
Defined Under Namespace
Modules: Search, Sqt Classes: DTA, DTAGen, Header, NoSequestParamsError, Out
Instance Attribute Summary collapse
-
#base_name ⇒ Object
Returns the value of attribute base_name.
-
#dta_files ⇒ Object
Returns the value of attribute dta_files.
-
#filtered_by_precursor_mass_tolerance ⇒ Object
a boolean to indicate if the results have been filtered by the sequest.params precursor mass tolerance.
-
#header ⇒ Object
Returns the value of attribute header.
-
#index ⇒ Object
a parallel array to dta_files and out_files where each entry is: [first_scan, last_scan, charge].
-
#out_files ⇒ Object
Returns the value of attribute out_files.
-
#params ⇒ Object
Returns the value of attribute params.
-
#version ⇒ Object
a String: 3.5, 3.3 or 3.2.
Class Method Summary collapse
-
.get_sequest_params(filename) ⇒ Object
returns a Sequest::Params object or nil if none.
Instance Method Summary collapse
- #dta_start_byte ⇒ Object
-
#filter_by_precursor_mass_tolerance! ⇒ Object
1.
-
#from_file(filename, opts) ⇒ Object
returns self opts are the same as for ‘new’.
-
#initialize(filename = nil, opts = {}) ⇒ Srf
constructor
opts: :filter_by_precursor_mass_tolerance => true | false (default true) # this will filter by the sequest params prec tolerance as is # typically done by Bioworks.
- #protein_class ⇒ Object
- #read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0) ⇒ Object
-
#read_dta_files(fh, num_files, unpack_35) ⇒ Object
returns an array of dta_files.
-
#read_out_files(fh, number_files, unpack_35, dup_refs_gt_0) ⇒ Object
filehandle (fh) must be at the start of the outfiles.
-
#read_scan_index(fh, num) ⇒ Object
returns an index where each entry is [first_scan, last_scan, charge].
Methods included from Search
Methods included from Sqt
Constructor Details
#initialize(filename = nil, opts = {}) ⇒ Srf
opts:
:filter_by_precursor_mass_tolerance => true | false (default true)
# this will filter by the sequest params prec tolerance as is
# typically done by Bioworks.
:link_protein_hits => true | false (default true)
# if true, generates the @prot attribute for the :prot method
# and creates one protein per reference that is linked to each
# relevant peptide hit.
# if false, each protein for each peptide hit is a unique object
# and the :prots method returns nil. If you are merging multiple
# searches then you probably want to set this to false to avoid
# recalculation.
:read_pephits => true | false (default true)
# will attempt to read peptide hit information (equivalent to .out
# files), otherwise, just reads the dta information.
:params => <path/to/sequest.params> Some srf files do not include
their sequest params file - include it here if necessary.
97 98 99 100 101 102 103 104 105 |
# File 'lib/ms/sequest/srf.rb', line 97 def initialize(filename=nil, opts={}) @peps = [] @dta_files = [] @out_files = [] if filename from_file(filename, opts) end end |
Instance Attribute Details
#base_name ⇒ Object
Returns the value of attribute base_name.
40 41 42 |
# File 'lib/ms/sequest/srf.rb', line 40 def base_name @base_name end |
#dta_files ⇒ Object
Returns the value of attribute dta_files.
34 35 36 |
# File 'lib/ms/sequest/srf.rb', line 34 def dta_files @dta_files end |
#filtered_by_precursor_mass_tolerance ⇒ Object
a boolean to indicate if the results have been filtered by the sequest.params precursor mass tolerance
44 45 46 |
# File 'lib/ms/sequest/srf.rb', line 44 def filtered_by_precursor_mass_tolerance @filtered_by_precursor_mass_tolerance end |
#header ⇒ Object
Returns the value of attribute header.
33 34 35 |
# File 'lib/ms/sequest/srf.rb', line 33 def header @header end |
#index ⇒ Object
a parallel array to dta_files and out_files where each entry is:
- first_scan, last_scan, charge
39 40 41 |
# File 'lib/ms/sequest/srf.rb', line 39 def index @index end |
#out_files ⇒ Object
Returns the value of attribute out_files.
35 36 37 |
# File 'lib/ms/sequest/srf.rb', line 35 def out_files @out_files end |
#params ⇒ Object
Returns the value of attribute params.
36 37 38 |
# File 'lib/ms/sequest/srf.rb', line 36 def params @params end |
#version ⇒ Object
a String: 3.5, 3.3 or 3.2
31 32 33 |
# File 'lib/ms/sequest/srf.rb', line 31 def version @version end |
Class Method Details
.get_sequest_params(filename) ⇒ Object
returns a Sequest::Params object or nil if none
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/ms/sequest/srf.rb', line 51 def self.get_sequest_params(filename) # split the file in half and only read the second half (since we can be # confident that the params file will be there!) File.open(filename, 'rb') do |handle| halfway = handle.stat.size / 2 handle.seek halfway last_half = handle.read if sequest_start_index = last_half.rindex('[SEQUEST]') params_start_index = sequest_start_index + halfway handle.seek(params_start_index) Ms::Sequest::Params.new.parse_io(handle) else nil # not found end end end |
Instance Method Details
#dta_start_byte ⇒ Object
68 69 70 71 72 73 74 |
# File 'lib/ms/sequest/srf.rb', line 68 def dta_start_byte case @version when '3.2' ; 3260 when '3.3' ; 3644 when '3.5' ; 3644 end end |
#filter_by_precursor_mass_tolerance! ⇒ Object
-
updates the out_file’s list of hits based on passing peptides (but not
the original hit id; rank is implicit in array ordering)
-
recalculates deltacn values completely if number of hits changed (does
not touch deltacn orig)
This can spoil proper protein -> peptide linkages. Ms::Id::Search.merge! should be run after this method to ensure correct protein -> peptide linkages.
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/ms/sequest/srf.rb', line 116 def filter_by_precursor_mass_tolerance! pmt = params.peptide_mass_tolerance.to_f methd = nil # the method to case params.peptide_mass_units when '0' amu_based = true milli_amu = false when '1' amu_based = true milli_amu = true when '2' amu_based = false end self.filtered_by_precursor_mass_tolerance = true self.out_files.each do |out_file| hits = out_file.hits before = hits.size hits.reject! do |pep| if amu_based if milli_amu (pep.deltamass.abs > (pmt/1000)) else (pep.deltamass.abs > pmt) end else (pep.ppm.abs > pmt) end end if hits.size != before out_file.hits = hits # <- is this necessary Ms::Sequest::Srf::Out::Pep.update_deltacns_from_xcorr(hits) out_file.num_hits = hits.size end end self end |
#from_file(filename, opts) ⇒ Object
returns self opts are the same as for ‘new’
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 |
# File 'lib/ms/sequest/srf.rb', line 172 def from_file(filename, opts) opts = { :filter_by_precursor_mass_tolerance => true, :link_protein_hits => true, :read_pephits => true}.merge(opts) @params = if opts[:params] Ms::Sequest::Params.new(opts[:params]) else Ms::Sequest::Srf.get_sequest_params(filename) end dup_references = 0 dup_refs_gt_0 = false if @params dup_references = @params.print_duplicate_references.to_i if dup_references == 0 # warn %Q{ #***************************************************************************** #WARNING: This srf file lists only 1 protein per peptide! (based on the #print_duplicate_references parameter in the sequest.params file used in its #creation) So, downstream output will likewise only contain a single protein #for each peptide hit. In many instances this is OK since downstream programs #will recalculate protein-to-peptide linkages from the database file anyway. #For complete protein lists per peptide hit, .srf files must be created with #print_duplicate_references > 0. HINT: to capture all duplicate references, #set the sequest parameter 'print_duplicate_references' to 100 or greater. #***************************************************************************** # } else dup_refs_gt_0 = true end end File.open(filename, 'rb') do |fh| @header = Ms::Sequest::Srf::Header.new.from_io(fh) @version = @header.version unpack_35 = case @version when '3.2' false when '3.3' false when '3.5' true end if @header.combined @base_name = File.basename(filename, '.*') # I'm not sure why this is the case, but the reported number is too # big by one on the 2 files I've seen so far, so we will correct it here! @header.dta_gen.num_dta_files = @header.dta_gen.num_dta_files - 1 if opts[:read_pephits] == false raise NotImplementedError, "on combined files must read everything right now!" end (@dta_files, @out_files) = read_dta_and_out_interleaved(fh, @header.num_dta_files, unpack_35, dup_refs_gt_0) else @base_name = @header.raw_filename.scan(/[\\\/]([^\\\/]+)\.RAW$/).first.first @dta_files = read_dta_files(fh, @header.num_dta_files, unpack_35) if opts[:read_pephits] # need the params file to know if the duplicate_references is set > 0 raise NoSequestParamsError, "no sequest params info in srf file!\npass in path to sequest.params file" if @params.nil? @out_files = read_out_files(fh,@header.num_dta_files, unpack_35, dup_refs_gt_0) if fh.eof? #warn "FILE: '#{filename}' appears to be an abortive run (no params in srf file)\nstill continuing..." @params = nil @index = [] end end end start_pos_in_case = fh.pos @params = Ms::Sequest::Params.new.parse_io(fh) if @params.nil? fh.pos = start_pos_in_case # seek to the index fh.scanf "\000\000\000\000" do |m| puts "MATCHING NULLS: " p m end warn "no params file, no index, corrupt file" else # we have a params file # This is very sensitive to the grab_params method in sequest params fh.read(12) ## gap between last params entry and index end @index = read_scan_index(fh,@header.num_dta_files) end ### UPDATE SOME THINGS: # give each hit a base_name, first_scan, last_scan if opts[:read_pephits] && !@header.combined @index.each_with_index do |ind,i| mass_measured = @dta_files[i][0] @out_files[i][0,3] = *ind pep_hits = @out_files[i][6] @peps.push( *pep_hits ) pep_hits.each do |pep_hit| pep_hit[15,4] = @base_name, *ind # add the deltamass pep_hit[12] = pep_hit[0] - mass_measured # real - measured (deltamass) pep_hit[13] = 1.0e6 * pep_hit[12].abs / mass_measured ## ppm pep_hit[19] = self ## link with the srf object end end filter_by_precursor_mass_tolerance! if params if opts[:link_protein_hits] (@peps, @prots) = merge!([peps]) do |_prot, _peps| prot = Ms::Sequest::Srf::Out::Prot.new(_prot.reference, _peps) end end end self end |
#protein_class ⇒ Object
46 47 48 |
# File 'lib/ms/sequest/srf.rb', line 46 def protein_class Ms::Sequest::Srf::Out::Prot end |
#read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0) ⇒ Object
155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/ms/sequest/srf.rb', line 155 def read_dta_and_out_interleaved(fh, num_files, unpack_35, dup_refs_gt_0) dta_files = Array.new(num_files) out_files = Array.new(num_files) start = dta_start_byte fh.pos = start num_files.times do |i| dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35) #p dta_files[i] out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0) #p out_files[i] end [dta_files, out_files] end |
#read_dta_files(fh, num_files, unpack_35) ⇒ Object
returns an array of dta_files
303 304 305 306 307 308 309 310 311 312 |
# File 'lib/ms/sequest/srf.rb', line 303 def read_dta_files(fh, num_files, unpack_35) dta_files = Array.new(num_files) start = dta_start_byte fh.pos = start header.num_dta_files.times do |i| dta_files[i] = Ms::Sequest::Srf::DTA.new.from_io(fh, unpack_35) end dta_files end |
#read_out_files(fh, number_files, unpack_35, dup_refs_gt_0) ⇒ Object
filehandle (fh) must be at the start of the outfiles. ‘read_dta_files’ will put the fh there.
316 317 318 319 320 321 322 |
# File 'lib/ms/sequest/srf.rb', line 316 def read_out_files(fh,number_files, unpack_35, dup_refs_gt_0) out_files = Array.new(number_files) header.num_dta_files.times do |i| out_files[i] = Ms::Sequest::Srf::Out.new.from_io(fh, unpack_35, dup_refs_gt_0) end out_files end |
#read_scan_index(fh, num) ⇒ Object
returns an index where each entry is [first_scan, last_scan, charge]
289 290 291 292 293 294 295 296 297 298 299 300 |
# File 'lib/ms/sequest/srf.rb', line 289 def read_scan_index(fh, num) ind_len = 24 index = Array.new(num) unpack_string = 'III' st = '' ind_len.times do st << '0' end ## create a 24 byte string to receive data num.times do |i| fh.read(ind_len, st) index[i] = st.unpack(unpack_string) end index end |