libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzxmloutput.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/output/mzxmloutput.cpp
3 * \date 23/11/2019
4 * \author Olivier Langella
5 * \brief write msrun peaks into mzxml output stream
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2019 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31#include "mzxmloutput.h"
32#include <QDebug>
33#include <QStringList>
34#include <algorithm>
35#include <cstdio>
36#include "../../config.h"
37
38using namespace pappso;
39
40
41template <class T>
42T
44{
45 char *const p = reinterpret_cast<char *>(&in);
46 for(size_t i = 0; i < sizeof(T) / 2; ++i)
47 std::swap(p[i], p[sizeof(T) - i - 1]);
48 return in;
49}
50
51
53{
54 mp_output = p_mzxml_output;
55}
59void
61 const QualifiedMassSpectrum &spectrum)
62{
63 qDebug();
64 mp_output->m_monitor.count();
65 mp_output->writeQualifiedMassSpectrum(spectrum);
66 qDebug();
67}
68bool
70{
71 return true;
72}
73
74
76 QIODevice *p_output_device)
77 : m_monitor(monitor)
78{
79
80 mpa_outputStream = new QXmlStreamWriter(p_output_device);
81 mpa_outputStream->setAutoFormatting(true);
82
83 mpa_outputStream->writeStartDocument("1.0");
84}
85
91
92void
94{
95 m_isReadAhead = isReadAhead;
96}
97void
99{
100 qDebug();
101 m_monitor.setTotalSteps(p_msrunreader->spectrumListSize());
102 writeHeader(p_msrunreader);
103
104 Translater translater(this);
105
106 translater.setReadAhead(m_isReadAhead);
107
109 // translater.setNeedMsLevelPeakList(1, false);
110 // translater.setNeedMsLevelPeakList(2, false);
111 p_msrunreader->readSpectrumCollection(translater);
112
114 qDebug();
115}
116
117void
119 pappso::MsRunReader *p_msrunreader)
120{
121 qDebug();
122 m_monitor.setTotalSteps(p_msrunreader->spectrumListSize());
123 writeHeader(p_msrunreader);
124
125 Translater translater(this);
126
127 translater.setReadAhead(m_isReadAhead);
128
129 translater.setNeedMsLevelPeakList(1, !m_ms1IsMasked);
130 // translater.setNeedMsLevelPeakList(1, false);
131 // translater.setNeedMsLevelPeakList(2, false);
132 p_msrunreader->readSpectrumCollection2(read_config, translater);
133
134 m_monitor.setTotalSteps(0);
135}
136
137void
139{
140
141 mpa_outputStream->writeNamespace("http://www.w3.org/2001/XMLSchema-instance",
142 "xsi");
143 // xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0"
144 // xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.0
145 // http://sashimi.sourceforge.net/schema_revision/mzXML_2.0/mzXML_idx_2.0.xsd"
146 /*
147114 writer.setPrefix("xsi", xmlnsxsi);
148115 writer.setDefaultNamespace(namespaceURI);
149mpa_outputStream->writeStartElement("mzXML");
150117 writer.writeNamespace("xsi", xmlnsxsi);
151118 writer.writeDefaultNamespace(namespaceURI);
152119
153120 writer.writeAttribute(xmlnsxsi, "schemaLocation",
154xsischemaLocation); 121 */
155 mpa_outputStream->writeStartElement("mzXML");
156 mpa_outputStream->writeAttribute(
157 "xmlns", "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2");
158 mpa_outputStream->writeAttribute(
159 "xsi:schemaLocation",
160 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2 "
161 "http://sashimi.sourceforge.net/schema_revision/mzXML_3.2/"
162 "mzXML_idx_3.2.xsd");
163
164 mpa_outputStream->writeStartElement("msRun");
165 mpa_outputStream->writeAttribute(
166 "scanCount", QString("%1").arg(p_msrunreader->spectrumListSize()));
167 //<msRun scanCount="16576" startTime="PT0.292553S" endTime="PT3000.34S">
168 // writer.writeAttribute("scanCount",
169 // ms_run.getSpectrumCount(this.controller).toString());
170
171 /*
172 * # < parentFile fileName = #
173 * "file://SEQUEST1/raw/vidal/20060411_VIDAL_JEAN_1_PEPCR1_42140.RAW" #
174 * fileType = "RAWData" fileSha1 = #
175 * "23c1620d4ad3f4f0103b0141b7caec1e8b7eebf5" / >
176 */
177 mpa_outputStream->writeStartElement("parentFile");
178 mpa_outputStream->writeAttribute("fileName",
179 p_msrunreader->getMsRunId()->getFileName());
180 mpa_outputStream->writeAttribute("fileType", "RAWData");
181 mpa_outputStream->writeEndElement();
182 /*
183144
184145 MsInstrumentList instrument_list =
185ms_run.getMsInstruments(controller); 146 for (MsInstrument
186instrument : instrument_list) { 147 this.write(instrument); 148 }
187*/
188
189 mpa_outputStream->writeStartElement("msInstrument");
190 mpa_outputStream->writeAttribute("msInstrumentID", "1");
191 //<msManufacturer category="msManufacturer" value="Thermo Scientific"/>
192 mpa_outputStream->writeStartElement("msManufacturer");
193 mpa_outputStream->writeAttribute("category", "msManufacturer");
194 mpa_outputStream->writeAttribute("value", "unknown");
195 mpa_outputStream->writeEndElement();
196 //<msModel category="msModel" value="Q Exactive"/>
197 // <msIonisation category="msIonisation" value="nanoelectrospray"/>
198 // <msMassAnalyzer category="msMassAnalyzer" value="quadrupole"/>
199 // <msDetector category="msDetector" value="inductive detector"/>
200 // <software type="acquisition" name="Xcalibur"
201 // version="2.1-152001/2.1.0.1520"/>
202 mpa_outputStream->writeEndElement();
203 /*
204149
205150 // #< dataProcessing centroided ="1" >
206151 // my $ref_data_processings =
207$ms_run_description->dataProcessing(); 152 MsDataProcessingList
208dataProcList = ms_run.getMsDataProcessings(controller); 153 for
209(MsDataProcessing msDataProc : dataProcList) { 154 this.write(msDataProc); 155 }
210*/
211 mpa_outputStream->writeStartElement("dataProcessing");
212 //<dataProcessing centroided="1">
213 mpa_outputStream->writeAttribute("centroided", "1");
214 // <software type="conversion" name="ProteoWizard" version="3.0.3706"/>
215 mpa_outputStream->writeStartElement("software");
216 mpa_outputStream->writeAttribute("type", "conversion");
217 mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
218 mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
219 mpa_outputStream->writeEndElement();
220 //<processingOperation name="Conversion to mzML"/>
221 mpa_outputStream->writeStartElement("processingOperation");
222 mpa_outputStream->writeAttribute("name", "Conversion to mzXML");
223 //<software type="processing" name="ProteoWizard" version="3.0.3706"/>
224 mpa_outputStream->writeStartElement("software");
225 mpa_outputStream->writeAttribute("type", "processing");
226 mpa_outputStream->writeAttribute("name", PAPPSOMSPP_NAME);
227 mpa_outputStream->writeAttribute("version", PAPPSOMSPP_VERSION);
228 mpa_outputStream->writeEndElement();
229 //<comment>Thermo/Xcalibur peak picking</comment>
230 mpa_outputStream->writeStartElement("comment");
231 mpa_outputStream->writeCharacters("pappso::MzxmlOutput");
232 mpa_outputStream->writeEndElement();
233 //</dataProcessing>
234 mpa_outputStream->writeEndElement();
235 mpa_outputStream->writeEndElement();
236 // Peaks
237}
238
239
240void
242{
243 mpa_outputStream->writeEndDocument();
244}
245
246
247std::size_t
248MzxmlOutput::getScanNumberFromNativeId(const QString &native_id) const
249{
250 QStringList native_id_list = native_id.split("=");
251 if(native_id_list.size() < 2)
252 {
253 }
254 else
255 {
256 return native_id_list.back().toULong();
257 }
258 return std::numeric_limits<std::size_t>::max();
259}
260
261std::size_t
263{
264 std::size_t scan_number =
266 if(scan_number == std::numeric_limits<std::size_t>::max())
267 {
268 scan_number = spectrum.getMassSpectrumId().getSpectrumIndex() + 1;
269 }
270 return scan_number;
271}
272
273std::size_t
275{
276
277 std::size_t scan_number =
279 if(scan_number == std::numeric_limits<std::size_t>::max())
280 {
281 scan_number = spectrum.getPrecursorSpectrumIndex() + 1;
282 }
283 return scan_number;
284}
285
286void
288 const pappso::QualifiedMassSpectrum &spectrum)
289{
290 qDebug();
291 mpa_outputStream->writeStartElement("scan");
292 /*
293 <scan num="1"
294 scanType="Full"
295 centroided="1"
296 msLevel="1"
297 peaksCount="1552"
298 polarity="+"
299 retentionTime="PT0.292553S"
300 lowMz="400.153411865234"
301 highMz="1013.123352050781"
302 basePeakMz="445.12003"
303 basePeakIntensity="2.0422125e06"
304 totIonCurrent="1.737798e07">*/
305 mpa_outputStream->writeAttribute("num",
306 QString("%1").arg(getScanNumber(spectrum)));
307 mpa_outputStream->writeAttribute("centroided", QString("1"));
308 mpa_outputStream->writeAttribute("msLevel",
309 QString("%1").arg(spectrum.getMsLevel()));
310 if(spectrum.getMassSpectrumCstSPtr().get() == nullptr)
311 {
312 mpa_outputStream->writeAttribute("peaksCount", "0");
313 }
314 else
315 {
316 mpa_outputStream->writeAttribute("peaksCount",
317 QString("%1").arg(spectrum.size()));
318
319 if(spectrum.size() > 0)
320 {
321 mpa_outputStream->writeAttribute(
322 "lowMz",
323 QString::number(
324 spectrum.getMassSpectrumCstSPtr().get()->front().x, 'f', 12));
325
326 mpa_outputStream->writeAttribute(
327 "highMz",
328 QString::number(
329 spectrum.getMassSpectrumCstSPtr().get()->back().x, 'f', 12));
330 // mpa_outputStream->writeAttribute("highMz",
331 // QString::number(spectrum.getMassSpectrumCstSPtr().get()->back().x,
332 // 'f', 10)); basePeakMz="245.1271988"
333 // basePeakIntensity="5810.7739"
334 // totIonCurrent="57803.815999999999">
335 }
336 }
337 mpa_outputStream->writeAttribute("polarity", "+");
338 mpa_outputStream->writeAttribute(
339 "retentionTime",
340 QString("PT%1S").arg(QString::number(spectrum.getRtInSeconds(), 'f', 2)));
341
342 if(spectrum.getMsLevel() > 1)
343 {
344
345 //<precursorMz precursorScanNum="16574"
346 // precursorIntensity="58403.04296875" precursorCharge="2"
347 ////activationMethod="HCD">994.690619901808</precursorMz>
348 mpa_outputStream->writeStartElement("precursorMz");
349 mpa_outputStream->writeAttribute(
350 "precursorScanNum",
351 QString("%1").arg(getPrecursorScanNumber(spectrum)));
352 bool ok;
353 double precursor_intensity = spectrum.getPrecursorIntensity(&ok);
354 if(ok)
355 {
356 mpa_outputStream->writeAttribute(
357 "precursorIntensity", QString::number(precursor_intensity, 'f', 4));
358 }
359 uint charge = spectrum.getPrecursorCharge(&ok);
360 if(ok)
361 {
362 mpa_outputStream->writeAttribute("precursorCharge",
363 QString("%1").arg(charge));
364 }
365 double precursor_mz = spectrum.getPrecursorMz(&ok);
366 if(ok)
367 {
368 mpa_outputStream->writeCharacters(
369 QString::number(precursor_mz, 'f', 12));
370 }
371 mpa_outputStream->writeEndElement();
372 }
373
374 /*<peaks compressionType="none"
375 compressedLen="0"
376 precision="64"
377 byteOrder="network"
378 contentType="m/z-int"></peaks>*/
379
380 mpa_outputStream->writeStartElement("peaks");
381 mpa_outputStream->writeAttribute("compressionType", "none");
382 mpa_outputStream->writeAttribute("compressedLen", "0");
383 mpa_outputStream->writeAttribute("precision", "64");
384 mpa_outputStream->writeAttribute("byteOrder", "network");
385 mpa_outputStream->writeAttribute("contentType", "m/z-int");
386
387 if((spectrum.getMassSpectrumCstSPtr().get() != nullptr) &&
388 (!spectrum.isEmptyMassSpectrum()))
389 {
390 QByteArray byte_array;
391 if(QSysInfo::ByteOrder == QSysInfo::LittleEndian)
392 {
393 for(const DataPoint &peak :
394 *(spectrum.getMassSpectrumCstSPtr().get()))
395 {
396 double swap = change_endian(peak.x);
397 byte_array.append((char *)&swap, 8);
398 swap = change_endian(peak.y);
399 byte_array.append((char *)&swap, 8);
400 }
401 }
402 else
403 {
404 for(const DataPoint &peak :
405 *(spectrum.getMassSpectrumCstSPtr().get()))
406 {
407 byte_array.append((char *)&peak.x, 8);
408 byte_array.append((char *)&peak.y, 8);
409 }
410 }
411 mpa_outputStream->writeCharacters(byte_array.toBase64());
412 }
413 mpa_outputStream->writeEndElement();
414
415 // scan
416 mpa_outputStream->writeEndElement();
417 qDebug();
418}
419
420void
422{
423 m_ms1IsMasked = mask_ms1;
424}
std::size_t getSpectrumIndex() const
const QString & getNativeId() const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:63
virtual std::size_t spectrumListSize() const =0
get the totat number of spectrum conained in the MSrun data file
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)=0
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler)=0
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
const MsRunIdCstSPtr & getMsRunId() const
Translater(MzxmlOutput *p_mzxml_output)
virtual bool needPeakList() const override
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum) override
void setReadAhead(bool read_ahead)
std::size_t getScanNumberFromNativeId(const QString &native_id) const
MzxmlOutput(UiMonitorInterface &monitor, QIODevice *p_output_device)
void write(MsRunReader *p_msrunreader)
UiMonitorInterface & m_monitor
Definition mzxmloutput.h:91
std::size_t getScanNumber(const QualifiedMassSpectrum &spectrum) const
QXmlStreamWriter * mpa_outputStream
Definition mzxmloutput.h:92
void writeQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)
void maskMs1(bool mask_ms1)
std::size_t getPrecursorScanNumber(const QualifiedMassSpectrum &spectrum) const
void writeHeader(MsRunReader *p_msrunreader)
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
uint getPrecursorCharge(bool *ok=nullptr) const
get precursor charge
const QString & getPrecursorNativeId() const
pappso_double getPrecursorIntensity(bool *ok=nullptr) const
get precursor intensity
const MassSpectrumId & getMassSpectrumId() const
Get the MassSpectrumId.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
std::size_t getPrecursorSpectrumIndex() const
Get the scan number of the precursor ion.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
virtual void setReadAhead(bool is_read_ahead) final
use threads to read a spectrum by batch of batch_size
virtual void setNeedMsLevelPeakList(unsigned int ms_level, bool want_peak_list) final
tells if we need the peak list given
virtual void setTotalSteps(std::size_t total_number_of_steps)
use it if the number of steps is known in an algorithm the total number of steps is usefull to report...
#define PAPPSOMSPP_VERSION
Definition config.h:4
#define PAPPSOMSPP_NAME
Definition config.h:3
T change_endian(T in)
write msrun peaks into mzxml output stream
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
unsigned int uint
Definition types.h:57