Class: Mspire::Mascot::Dat

Inherits:
Object
  • Object
show all
Defined in:
lib/mspire/mascot/dat.rb,
lib/mspire/mascot/dat/cast.rb,
lib/mspire/mascot/dat/index.rb,
lib/mspire/mascot/dat/query.rb,
lib/mspire/mascot/dat/header.rb,
lib/mspire/mascot/dat/masses.rb,
lib/mspire/mascot/dat/peptide.rb,
lib/mspire/mascot/dat/protein.rb,
lib/mspire/mascot/dat/parameters.rb,
lib/mspire/mascot/dat/section/key_val.rb

Defined Under Namespace

Modules: Cast, Castable, Section Classes: Header, Index, Masses, Parameters, Peptide, Protein, Query

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(io, index_file = false) ⇒ Dat

if index_file is true, will attempt to use a written index file based on naming conventions; if one doesn’t yet exist it will create one for the next usage. If handed a String, will consider it the index filename for reading or writing depending on whether it exists.



69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/mspire/mascot/dat.rb', line 69

def initialize(io, index_file=false)
  @io = io
  index_filename = 
    case index_file
    when String then index_file
    when TrueClass then Dat::Index.index_filename(io.path)
    else
      nil
    end
  @index = Index.new
  if index_filename && File.exist?(index_filename)
    @index.from_byteindex!(index_filename)
  else
    @index.from_io!(@io)
  end

  if index_filename && !File.exist?(index_filename)
    @index.write(index_filename)
  end
end

Instance Attribute Details

#indexObject

the index object which points to the start byte for each section



63
64
65
# File 'lib/mspire/mascot/dat.rb', line 63

def index
  @index
end

#ioObject

the io object which is the open dat file



60
61
62
# File 'lib/mspire/mascot/dat.rb', line 60

def io
  @io
end

Class Method Details

.each_key_val(io, &block) ⇒ Object

returns the key and value for KEY=VAL sections



29
30
31
32
33
34
35
36
# File 'lib/mspire/mascot/dat.rb', line 29

def each_key_val(io, &block)
  return to_enum(__method__, io) unless block
  each_line(io) do |line|
    line.chomp!
    (key, val) = line.split('=',2)
    block.call( [key, (val=='' ? nil : val)] )
  end
end

.each_line(io, &block) ⇒ Object

reads each line from a section until reaching the end of the section



20
21
22
23
24
25
26
# File 'lib/mspire/mascot/dat.rb', line 20

def each_line(io, &block)
  return to_enum(__method__, io) unless block
  io.each_line do |line|
    break if line[0,2] == '--'
    block.call(line)
  end
end

.open(file, index_file = false, &block) ⇒ Object



49
50
51
52
53
54
# File 'lib/mspire/mascot/dat.rb', line 49

def open(file, index_file=false, &block)
  io = File.open(file)
  response = block.call(self.new(io, index_file))
  io.close
  response
end

.string(io, &block) ⇒ Object



38
39
40
# File 'lib/mspire/mascot/dat.rb', line 38

def string(io, &block)
  each_line(io).to_a.join
end

.strip_quotes(string) ⇒ Object

returns the string after stripping off leading and trailing double quotation marks



44
45
46
# File 'lib/mspire/mascot/dat.rb', line 44

def strip_quotes(string)
  string.gsub(/\A"|"\Z/, '')
end

Instance Method Details

#each_decoy_peptide(top_n = Float::INFINITY, &block) ⇒ Object



207
208
209
# File 'lib/mspire/mascot/dat.rb', line 207

def each_decoy_peptide(top_n=Float::INFINITY, &block)
  each_peptide(false, top_n, &block)
end

#each_peptide(*args, &block) ⇒ Object

optional parameters, passed in any order:

top_n: [Float::INFINITY] a Numeric (top N hits)
non_decoy: [true] a Boolean

Returns the top_n hits. If non_decoy is false or nil, returns the decoy hits.

each_peptide(false, 1) # top decoy peptide hit
each_peptide(2, true)  # top 2 peptide hits per query
each_peptide(1)        # top peptide hit per query


194
195
196
197
198
199
200
201
202
203
204
205
# File 'lib/mspire/mascot/dat.rb', line 194

def each_peptide(*args, &block)
  return to_enum(__method__, *args) unless block
  (numeric, boolean) = args.partition {|arg| arg.is_a?(Numeric) }
  top_n = numeric.first || Float::INFINITY
  non_decoy = ((boolean.size > 0) ? boolean.first : true)
  start_section!(non_decoy ? :peptides : :decoy_peptides)
  Mspire::Mascot::Dat::Peptide.each(@io) do |peptide|
    if peptide.peptide_num <= top_n
      block.call(peptide) 
    end
  end
end

#each_protein(&block) ⇒ Object



151
152
153
154
155
156
157
158
# File 'lib/mspire/mascot/dat.rb', line 151

def each_protein(&block)
  return to_enum(__method__) unless block
  start_section!(:proteins)
  Dat.each_key_val(@io) do |key, val|
    (mw_s, desc) = val.split(',', 2)
    block.call(Dat::Protein.new(Dat.strip_quotes(key), mw_s.to_f, Dat.strip_quotes(desc)))
  end
end

#each_query(&block) ⇒ Object



160
161
162
163
164
165
166
167
# File 'lib/mspire/mascot/dat.rb', line 160

def each_query(&block)
  return to_enum(__method__) unless block
  @index.query_nums.each do |query_num| 
    byte = @index.query_num_to_byte[query_num]
    @io.pos = byte
    block.call( Mspire::Mascot::Dat::Query.new.from_io!(@io) )
  end
end

#query(n) ⇒ Object

returns query number n (these are NOT zero indexed)



178
179
180
181
# File 'lib/mspire/mascot/dat.rb', line 178

def query(n)
  start_section!(n)
  Query.new.from_io!(@io)
end

#section(*args) ⇒ Object Also known as: []

the univeral way to access information returns the section with appropriate cast (if available) or as a String object with the information. nil if it doesn’t exist. Also responds to :query by calling Query::each. An enumerator is called for enumerable objects.

dat.section(:header)  # => a Dat::Header object (hash-like)
dat.section(:peptides)  # => an Enumerator for peptides
dat.section(:peptides, 1)  # => an Enumerator for top peptides
dat[:peptides, 1].each {|peptide| ... <top peptide> }
# the equivalent each_<whatever> method:
dat.each_peptide(1) {|peptide| ... <top peptide> }

# aliased with #[] for bracket access:
dat[:header]
dat[:peptides, 1]
...


108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/mspire/mascot/dat.rb', line 108

def section(*args)
  # If the name exists as a class, then try to call the from_io method
  # on the class (e.g., Parameters.from_io(io)). If the name is a
  # plural, try the singular and the ::each method on the singular class
  # (e.g., Peptide::each).
  name = args.first.to_sym
  capitalized = name.to_s.capitalize
  maybe_singular = 
    case capitalized
    when 'Queries'
      'query'
    else
      start_section!(name)
      capitalized[0...-1]
    end
  maybe_iterator = "each_#{maybe_singular.downcase}".to_sym
  if self.respond_to?(maybe_iterator)
    self.send(maybe_iterator, *args[1..-1])
  elsif Mspire::Mascot::Dat.const_defined?(capitalized)
    klass = Mspire::Mascot::Dat.const_get(capitalized)
    obj = klass.new
    if obj.respond_to?(:from_io!)
      case name
      when :parameters, :masses
        obj.send(:from_io!, @io, false)
      else
        obj.send(:from_io!, @io)
      end
    else
      nil
    end
    #elsif Mspire::Mascot::Dat.const_defined?(maybe_singular)
    #  klass = Mspire::Mascot::Dat.const_get(maybe_singular)
    #  klass.send(:each, @io, &block)
  elsif @index.byte_num.key?(name)
    Mspire::Mascot::Dat.string(@io)
  else
    nil
  end
end

#sectionsObject Also known as: keys

returns a list of all sections as symbols. The symbol :queries is returned rather than each query individually if their is 1 or more queries.



214
215
216
217
218
219
220
# File 'lib/mspire/mascot/dat.rb', line 214

def sections
  reply = @index.byte_num.keys
  if @index.has_queries?
    reply.push('queries')
  end
  reply.map(&:to_sym)
end

#start_section!(name) ⇒ Object

positions io at the beginning of the section data (past the Content type and blank line). If given an integer, interprets it as a query number. returns self



172
173
174
175
# File 'lib/mspire/mascot/dat.rb', line 172

def start_section!(name)
  @io.pos = @index[name]
  self
end