Class: Ms::Mascot::Dat::Archive

Inherits:
ExternalArchive
  • Object
show all
Includes:
Ms::Mascot::Dat, Utils
Defined in:
lib/ms/mascot/dat/archive.rb

Overview

Provides access to a Mascot dat file.

Defined Under Namespace

Modules: Utils

Constant Summary

Constants included from Ms::Mascot::Dat

CONTENT_TYPE_CLASSES

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils

content_type_class, parse_content_type, parse_metadata

Methods included from Ms::Mascot::Dat

open

Constructor Details

#initialize(io = nil, io_index = nil) ⇒ Archive

Returns a new instance of Archive.



98
99
100
101
102
# File 'lib/ms/mascot/dat/archive.rb', line 98

def initialize(io=nil, io_index=nil)
  super(io)
  @metadata = (io)
  @section_names = []
end

Instance Attribute Details

#metadataObject (readonly)

A hash of metadata associated with this dat file.



96
97
98
# File 'lib/ms/mascot/dat/archive.rb', line 96

def 
  @metadata
end

Instance Method Details

#boundaryObject

The boundary separating sections, typically ‘–gc0p4Jq0M2Yt08jU534c0p’.



105
106
107
# File 'lib/ms/mascot/dat/archive.rb', line 105

def boundary
  "--#{[:boundary]}"
end

#each_peptide_hit(opts = {}) ⇒ Object

by default, yields the top PeptideHit object per query opts may be:

:by => :top
  :top     top ranked hit (default)
  :groups  an array of hits
  :all     each peptide hit (all ranks)

:yield_nil => true 
  true     returns nil when a query had no peptide hit (default)
  false    this hit (or group) is not yielded
:with_query => false
  false    just returns peptide hits/groups (default) 
  true     yields the peptide_hit/group and associated query


205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# File 'lib/ms/mascot/dat/archive.rb', line 205

def each_peptide_hit(opts={})
  defaults = { :by => :top, :yield_nil => true, :with_query => false }
  (by, yield_nil, with_query) = defaults.merge(opts).values_at(:by, :yield_nil, :with_query)

  peptides = section('peptides')
  1.upto(nqueries) do |n|
    case by
    when :top
      hit = peptides.peptide_hit(n)
      unless !yield_nil && hit.nil?
        if with_query
          yield hit, query(n)
        else
          yield hit
        end
      end
    when :groups
      group = peptides.peptide_hits(n)
      group.shift # remove the 0 index
      unless !yield_nil && group.first.nil?
        if with_query
          yield group, query(n)
        else
          yield group
        end
      end
    when :all

      group = peptides.peptide_hits(n)
      group.shift # remove the 0 index
      unless !yield_nil && group.first.nil?
        # need to return the nil hit if we are yielding nils:
        if group.first.nil?
          if with_query
            yield nil, query(n)
          else
            yield nil
          end
        end
        group.each do |pep_hit|
          if with_query
            yield pep_hit, query(n)
          else
            yield pep_hit
          end
        end
      end
    end
  end
end

#each_queryObject

Yields each query to the block.



177
178
179
180
181
# File 'lib/ms/mascot/dat/archive.rb', line 177

def each_query
  1.upto(nqueries) do |n|
    yield(query(n))
  end
end

#nqueriesObject

Returns the number of queries registered in self.



172
173
174
# File 'lib/ms/mascot/dat/archive.rb', line 172

def nqueries
  @nqueries ||= section_names.select {|name| name =~ /query/ }.length
end

#query(num) ⇒ Object

Returns the specified query.



184
185
186
187
188
189
190
# File 'lib/ms/mascot/dat/archive.rb', line 184

def query(num)
  if si = section_index("query#{num}")
    self[si]
  else
    nil
  end
end

#reindex(&block) ⇒ Object

Reindexes self.



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/ms/mascot/dat/archive.rb', line 110

def reindex(&block)
  @section_names.clear
  reindex_by_sep(boundary, 
    :entry_follows_sep => true, 
    :exclude_sep => true,
    # :blksize => 8388608,  # default in ExternalArchive
    :blksize => 33_554_432,  # quadrupled the blksize
  &block)

  # remove the first and last entries, which contain
  # the metadata and indicate the end of the multipart 
  # form data.
  io_index.shift
  io_index.pop

  self
end

#section(name) ⇒ Object

Returns the entry for the named section.



150
151
152
# File 'lib/ms/mascot/dat/archive.rb', line 150

def section(name)
  self[section_index(name)]
end

#section_index(name) ⇒ Object

Returns the index of the named section.



155
156
157
158
159
160
# File 'lib/ms/mascot/dat/archive.rb', line 155

def section_index(name)
  0.upto(length - 1) do |index|
    return index if section_name(index) == name
  end
  nil
end

#section_name(index) ⇒ Object

Returns the section name for the entry at index.



163
164
165
166
167
168
169
# File 'lib/ms/mascot/dat/archive.rb', line 163

def section_name(index)
  # all sections must be resolved for negative indicies to
  # work correctly (since otherwise @section_names may not
  # have the same length as self)
  resolve_sections if index < 0
  @section_names[index] ||= parse_section_name(index)
end

#section_names(resolve = true) ⇒ Object

The section names corresponding to each entry in self.

Normally section names are lazily parsed from the Content-Type header of an entry as needed. If resolve is true, all section names are parsed and then returned; otherwise section_names may return a partially-filled array.



144
145
146
147
# File 'lib/ms/mascot/dat/archive.rb', line 144

def section_names(resolve=true)
  resolve_sections if resolve
  @section_names
end

#str_to_entry(str) ⇒ Object

Converts str into an entry according to the content type header which should be present at the start of the string.



130
131
132
133
134
135
136
# File 'lib/ms/mascot/dat/archive.rb', line 130

def str_to_entry(str)
  if ctc = content_type_class(parse_content_type(str))
    ctc.parse(str, self)
  else
    str
  end
end