Class: Ms::Msrun::Nokogiri::Mzxml

Inherits:
Object
  • Object
show all
Defined in:
lib/ms/msrun/nokogiri/mzxml.rb

Constant Summary collapse

NetworkOrder =
true

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(msrun_object, io, version) ⇒ Mzxml

Returns a new instance of Mzxml.



16
17
18
19
20
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 16

def initialize(msrun_object, io, version)
  @msrun = msrun_object
  @io = io
  @version = version
end

Instance Attribute Details

#ioObject

Returns the value of attribute io.



14
15
16
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 14

def io
  @io
end

#msrunObject

Returns the value of attribute msrun.



14
15
16
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 14

def msrun
  @msrun
end

#versionObject

Returns the value of attribute version.



14
15
16
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 14

def version
  @version
end

Instance Method Details

#new_scan_from_node(node) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 147

def new_scan_from_node(node)
  scan = Ms::Scan.new  # array class creates one with 9 positions
  scan[0] = node['num'].to_i
  scan[1] = node['msLevel'].to_i
  if x = node['retentionTime']
    scan[2] = x[2...-1].to_f
  end
  if x = node['startMz']
    scan[3] = x.to_f
    scan[4] = node['endMz'].to_f
  end
  scan[5] = node['peaksCount'].to_i
  scan[6] = node['totIonCurrent'].to_f
  if fl = node['filterLine']
    (scan[3], scan[4]) = start_end_from_filter_line(fl)
  end
  scan
end

#parse_header(byte_length_or_header_string) ⇒ Object

returns the msrun



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 23

def parse_header(byte_length_or_header_string)
  string = 
    if byte_length_or_header_string.is_a? Integer
      @io.rewind
      @io.read(byte_length_or_header_string)
    else
      length_or_header_string
    end
  doc = Nokogiri::XML.parse(string, *Ms::Msrun::Nokogiri::PARSER_ARGS)
  msrun_n = doc.root 
  if @version >= '2.0'
    msrun_n = msrun_n.child
  end
  @msrun.scan_count = msrun_n['scanCount'].to_i
  @msrun.start_time = msrun_n['startTime'][2...-1].to_f
  @msrun.end_time = msrun_n['endTime'][2...-1].to_f

  filename = msrun_n.search("parentFile").first['fileName']
  (bn, dn) = Ms::Mzxml.parent_basename_and_dir(filename)
  @msrun.parent_basename = bn
  @msrun.parent_location = dn
  @msrun
end

#parse_ms_level(start_byte, length) ⇒ Object

returns the ms_level as an Integer, nil if it cannot be found.



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 48

def parse_ms_level(start_byte, length)
  start_io_pos = @io.pos
  @io.pos = start_byte
  ms_level = nil
  total_length = 0
  @io.each("\n") do |line|
    if line =~ /msLevel="(\d+)"/o
      ms_level = $1.to_i
      break
    end
    total_length += line.size
    break if total_length > length
  end
  @io.pos = start_io_pos
  ms_level
end

#parse_scan(start_byte, length, options = {}) ⇒ Object

assumes that the io object has been set to the beginning of the scan element. Returns an Ms::Scan object options:

:spectrum => true | false (default is true)
:precursor => true | false (default is true)

Note that if both :spectrum and :precursor are set to false, the basic information in the scan node is parsed (such as ms_level)



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 73

def parse_scan(start_byte, length, options={})
  opts = {:spectrum => true, :precursor => true}.merge(options)
  start_io_pos = @io.pos
  @io.pos = start_byte
  # read in the data keeping track of peaks start and stop
  string = ""
  if opts[:spectrum]
    string = @io.read(length)
  else
    # don't bother reading all the peak information if we aren't wanting it
    # and can avoid it!  This is important for high res instruments
    # especially since the peak data is huge.
    @io.each do |line|
      if md = %r{<peaks}.match(line)
        # just add the part of the string before the <peaks> tag
        string << line.slice!(0, md.end(0) - 6)
        break
      else
        string << line
        if string.size >= length
          if string.size > length
            string.slice!(0,length)
          end
          break
        end
      end
    end
  end

  doc = Nokogiri::XML.parse(string, *Ms::Msrun::Nokogiri::PARSER_ARGS)
  scan_n = doc.root
  scan = new_scan_from_node( scan_n )
  prec_n = scan_n.child

  peaks_n = 
    if prec_n.name == 'precursorMz'
      if opts[:precursor]
        prec = Ms::Precursor.new
        prec[1] = prec_n['precursorIntensity'].to_f
        prec[0] = prec_n.text.to_f
        if x = prec_n['precursorCharge']
          prec[3] = [x.to_i]
        end
        scan.precursor = prec
      end
      prec_n.next_sibling
    else
      prec_n # this is a peaks node
    end

  # is this for mzData?
  #if x = node['precursorScanNum']
  #  prec[2] = scans_by_num[x.to_i]
  #end
   
  if opts[:spectrum]
    # all mzXML (at least versions 1--3.0) *must* be 'network' byte order!
    # data is stored as the base64 string until we actually try to access
    # it!  At that point the string is decoded and knows it is interleaved
    # data.  So, no spectrum is actually decoded unless it is accessed!
    compression_type = peaks_n['compressionType']
    lazy_string = Ms::Data::LazyString.new(peaks_n.text, Ms::Data::LazyIO.unpack_code(peaks_n['precision'].to_i, NetworkOrder), compression_type == 'zlib')
    peaks_data = Ms::Data.new_interleaved(lazy_string)
    spec = Ms::Spectrum.new(peaks_data)
    scan[8] = Ms::Spectrum.new(peaks_data)
  end
  scan
end

#start_end_from_filter_line(line) ⇒ Object



142
143
144
145
# File 'lib/ms/msrun/nokogiri/mzxml.rb', line 142

def start_end_from_filter_line(line)
  # "ITMS + c NSI d Full ms3 [email protected] [email protected] [160.00-1275.00]"
  /\[([^-]+)-([^-]+)\]/.match(line)[1,2].map {|v| v.to_f }
end