Module: Mspire::Mzml::Reader
- Included in:
- Mspire::Mzml
- Defined in:
- lib/mspire/mzml/reader.rb
Instance Attribute Summary collapse
-
#link ⇒ Object
Returns the value of attribute link.
Instance Method Summary collapse
-
#get_default_data_processing_ids(io, index_list, lookback = 200) ⇒ Object
returns a hash keyed by :spectrum or :chromatogram that gives the id (aka ref) as a string.
-
#get_header_string(io) ⇒ Object
saves ~ 3 seconds when reading a 83M mzML file to scrape off the header string (even though we’re just handing in an IO object to Nokogiri::XML::Document.parse and we are very careful to not parse too far).
-
#read_header!(list_type_to_default_data_processing_id) ⇒ Object
list_type_to_default_data_processing_id is a hash keyed by :spectrum or :chromatogram that gives the default data_processing_object for the SpectrumList and/or the ChromatogramList.
- #set_from_xml_io!(xml_io) ⇒ Object
Instance Attribute Details
#link ⇒ Object
Returns the value of attribute link.
28 29 30 |
# File 'lib/mspire/mzml/reader.rb', line 28 def link @link end |
Instance Method Details
#get_default_data_processing_ids(io, index_list, lookback = 200) ⇒ Object
returns a hash keyed by :spectrum or :chromatogram that gives the id (aka ref) as a string.
43 44 45 46 47 48 49 50 51 52 |
# File 'lib/mspire/mzml/reader.rb', line 43 def get_default_data_processing_ids(io, index_list, lookback=200) hash = {} index_list.each_pair do |name, index| io.bookmark do |io| io.pos = index[0] - lookback hash[name] = io.read(lookback)[/<#{name}List.*defaultDataProcessingRef=['"](.*?)['"]/m, 1] end end hash end |
#get_header_string(io) ⇒ Object
saves ~ 3 seconds when reading a 83M mzML file to scrape off the header string (even though we’re just handing in an IO object to Nokogiri::XML::Document.parse and we are very careful to not parse too far).
58 59 60 61 62 63 64 65 66 67 68 69 |
# File 'lib/mspire/mzml/reader.rb', line 58 def get_header_string(io) chunk_size = 2**12 loc = 0 string = '' while chunk = @io.read(chunk_size) string << chunk start_looking = ((loc-20) < 0) ? 0 : (loc-20) break if string[start_looking..-1] =~ /<(spectrum|chromatogram)/ loc += chunk_size end string end |
#read_header!(list_type_to_default_data_processing_id) ⇒ Object
list_type_to_default_data_processing_id is a hash keyed by :spectrum or :chromatogram that gives the default data_processing_object for the SpectrumList and/or the ChromatogramList. This information is not obtainable from the header string, so must be pre-obtained.
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# File 'lib/mspire/mzml/reader.rb', line 75 def read_header!(list_type_to_default_data_processing_id) @io.rewind string = get_header_string(@io) doc = Nokogiri::XML.parse(string, nil, @encoding, Mspire::Mzml::Parser::NOBLANKS) doc.remove_namespaces! mzml_n = doc.root if mzml_n.name == 'indexedmzML' mzml_n = mzml_n.child end @id = mzml_n[:id] cv_list_n = mzml_n.child self.cvs = cv_list_n.children.map do |cv_n| Mspire::Mzml::CV.from_xml(cv_n) end # get the file description node but deal with it after getting ref_hash file_description_n = cv_list_n.next xml_n = file_description_n.next # a hash of referenceable_param_groups indexed by id @link = {} if xml_n.name == 'referenceableParamGroupList' self.referenceable_param_groups = xml_n.children.map do |rpg_n| Mspire::Mzml::ReferenceableParamGroup.from_xml(rpg_n) # <- no ref_hash (not made yet) end @link[:ref_hash] = self.referenceable_param_groups.index_by(&:id) xml_n = xml_n.next end # now we can set the file description because we have the ref_hash self.file_description = Mspire::Mzml::FileDescription.from_xml(file_description_n, @link) @link[:source_file_hash] = self.file_description.source_files.index_by(&:id) loop do case xml_n.name when 'sampleList' self.samples = xml_n.children.map do |sample_n| Mspire::Mzml::Sample.from_xml(sample_n, @link) end @link[:sample_hash] = self.samples.index_by(&:id) when 'softwareList' # required self.software_list = xml_n.children.map do |software_n| Mspire::Mzml::Software.from_xml(software_n, @link) end @link[:software_hash] = self.software_list.index_by(&:id) when 'instrumentConfigurationList' self.instrument_configurations = xml_n.children.map do |inst_config_n| Mspire::Mzml::InstrumentConfiguration.from_xml(inst_config_n, @link) end @link[:instrument_configuration_hash] = self.instrument_configurations.index_by(&:id) when 'dataProcessingList' self.data_processing_list = xml_n.children.map do |data_processing_n| Mspire::Mzml::DataProcessing.from_xml(data_processing_n, @link) end @link[:data_processing_hash] = self.data_processing_list.index_by(&:id) when 'run' @link[:index_list] = @index_list list_type_to_default_data_processing_id.each do |type, process_id| @link["#{type}_default_data_processing".to_sym] = @link[:data_processing_hash][process_id] end self.run = Mspire::Mzml::Run.from_xml(@io, xml_n, @link) break end xml_n = xml_n.next end end |
#set_from_xml_io!(xml_io) ⇒ Object
30 31 32 33 34 35 36 37 38 39 |
# File 'lib/mspire/mzml/reader.rb', line 30 def set_from_xml_io!(xml_io) @io = xml_io begin @encoding = @io.bookmark(true) {|io| io.readline.match(/encoding=["'](.*?)["']/)[1] } rescue EOFError raise RuntimeError, "no encoding present in XML! (Is this even an xml file?)" end @index_list = Mspire::Mzml::IndexList.from_io(@io) read_header!( get_default_data_processing_ids(@io, @index_list) ) end |