libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pwizmsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3 * \date 29/05/2018
4 * \author Olivier Langella
5 * \brief MSrun file reader base on proteowizard library
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31
32#include <QDebug>
33
34#include "pwizmsrunreader.h"
35
36#include <pwiz/data/msdata/DefaultReaderList.hpp>
37
38
39#include "../../utils.h"
40#include "../../pappsoexception.h"
41#include "../../exception/exceptionnotfound.h"
42#include "../../exception/exceptionnotpossible.h"
43
44
45// int pwizMsRunReaderMetaTypeId =
46// qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47
48
49namespace pappso
50{
51
52
54 : MsRunReader(msrun_id_csp)
55{
56 // The initialization needs to be done immediately so that we get the pwiz
57 // MsDataPtr corresponding to the right ms_run_id in the parameter. That
58 // pointer will be set to msp_msData.
59
60 initialize();
61}
62
63
64void
66{
67 std::string file_name_std =
69
70 // Make a backup of the current locale
71 std::string env_backup = setlocale(LC_ALL, "");
72 // struct lconv *lc = localeconv();
73
74 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
75 //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
76 //<< lc->decimal_point;
77
78 // Now actually search the useful MSDataPtr to the member variable.
79
80 pwiz::msdata::DefaultReaderList defaultReaderList;
81
82 std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
83
84 try
85 {
86 defaultReaderList.read(file_name_std, msDataPtrVector);
87 }
88 catch(std::exception &error)
89 {
90 qDebug() << QString("Failed to read the data from file %1")
91 .arg(QString::fromStdString(file_name_std));
92
93 throw(PappsoException(
94 QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
95 .arg(mcsp_msRunId->getFileName())
96 .arg(mcsp_msRunId.get()->toString())
97 .arg(error.what())));
98 }
99
100 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
101 //<< "The number of runs is:" << msDataPtrVector.size()
102 //<< "The number of spectra in first run is:"
103 //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
104
105 // Single-run file handling here.
106
107 // Specific case of the MGF data format: we do not have a run id for that kind
108 // of data. In this case there must be a single run!
109
110 if(mcsp_msRunId->getRunId().isEmpty())
111 {
112 if(msDataPtrVector.size() != 1)
113 throw(
114 ExceptionNotPossible("For the kind of file at hand there can only be "
115 "one run in the file."));
116
117 // At this point we know the single msDataPtr is the one we are looking
118 // for.
119
120 msp_msData = msDataPtrVector.front();
121 }
122 else
123 {
124 // Multi-run file handling here.
125 for(auto &msDataPtr : msDataPtrVector)
126 {
127 if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
128 {
129 msp_msData = msDataPtr;
130
131 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
132 //<< "Found the right MSDataPtr for run id.";
133
134 break;
135 }
136 }
137 }
138
139 if(msp_msData == nullptr)
140 {
142 QString("Could not find a MSDataPtr matching the requested run id : %1")
143 .arg(mcsp_msRunId.get()->toString())));
144 }
145
146
147 // check if this MS run can be used with scan numbers
148 // MS:1000490 Agilent instrument model
149 pwiz::cv::CVID native_id_format =
150 pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
151
152 // msp_msData.get()->getDefaultNativeIDFormat();
153
154 if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
155 {
156 m_hasScanNumbers = true;
157 }
158 else
159 {
160 m_hasScanNumbers = false;
161 }
162
163 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::mzXML)
164 {
165 m_hasScanNumbers = true;
166 }
167}
168
169
173
174
175pwiz::msdata::SpectrumPtr
176PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
177 std::size_t spectrum_index,
178 bool want_binary_data) const
179{
180 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
181
182 try
183 {
184 native_pwiz_spectrum_sp =
185 p_spectrum_list->spectrum(spectrum_index, want_binary_data);
186 }
187 catch(std::runtime_error &error)
188 {
189 qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
190 << typeid(error).name();
191
192 throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
193 "MS file std::runtime_error :\n%2")
194 .arg(spectrum_index)
195 .arg(error.what()));
196 }
197 catch(std::exception &error)
198 {
199 qDebug() << "getPwizSpectrumPtr error " << error.what()
200 << typeid(error).name();
201
202 throw ExceptionNotFound(
203 QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
204 .arg(spectrum_index)
205 .arg(error.what()));
206 }
207
208 if(native_pwiz_spectrum_sp.get() == nullptr)
209 {
210 throw ExceptionNotFound(
211 QObject::tr(
212 "Pwiz spectrum index %1 not found in MS file : null pointer")
213 .arg(spectrum_index));
214 }
215
216 return native_pwiz_spectrum_sp;
217}
218
219
220bool
222 pwiz::msdata::Spectrum *spectrum_p,
223 QualifiedMassSpectrum &qualified_mass_spectrum) const
224{
225
226 // We now have to set the retention time at which this mass spectrum
227 // was acquired. This is the scan start time.
228
229 if(!spectrum_p->scanList.scans[0].hasCVParam(
230 pwiz::msdata::MS_scan_start_time))
231 {
232 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
233 { // MGF could not have scan start time
234 qualified_mass_spectrum.setRtInSeconds(-1);
235 }
236 else
237 {
239 "The spectrum has no scan start time value set."));
240 }
241 }
242 else
243 {
244 pwiz::data::CVParam retention_time_cv_param =
245 spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
246
247 // Try to get the units of the retention time value.
248
249 std::string unit_name = retention_time_cv_param.unitsName();
250 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
251 //<< "Unit name for the retention time:"
252 //<< QString::fromStdString(unit_name);
253
254 if(unit_name == "second")
255 {
256 qualified_mass_spectrum.setRtInSeconds(
257 retention_time_cv_param.valueAs<double>());
258 }
259 else if(unit_name == "minute")
260 {
261 qualified_mass_spectrum.setRtInSeconds(
262 retention_time_cv_param.valueAs<double>() * 60);
263 }
264 else
265 throw(
266 ExceptionNotPossible("Could not determine the unit for the "
267 "scan start time value."));
268 }
269
270 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
271 //<< "Retention time for spectrum is:"
272 //<< qualified_mass_spectrum.getRtInSeconds();
273
274 // Old version not checking unit (by default unit is minutes for RT,
275 // not seconds)
276 //
277 // pappso_double retentionTime =
278 // QString(spectrum_p->scanList.scans[0]
279 //.cvParam(pwiz::msdata::MS_scan_start_time)
280 //.value.c_str())
281 //.toDouble();
282 // qualified_mass_spectrum.setRtInSeconds(retentionTime);
283
284 return true;
285}
286
287
288bool
290 pwiz::msdata::Spectrum *spectrum_p,
291 QualifiedMassSpectrum &qualified_mass_spectrum) const
292{
293 // Not all the acquisitions have ion mobility data. We need to test
294 // that:
295
296 if(spectrum_p->scanList.scans[0].hasCVParam(
297 pwiz::msdata::MS_ion_mobility_drift_time))
298 {
299
300 // qDebug() << "as strings:"
301 //<< QString::fromStdString(
302 // spectrum_p->scanList.scans[0]
303 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
304 //.valueAs<std::string>());
305
306 pappso_double driftTime =
307 spectrum_p->scanList.scans[0]
308 .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
309 .valueAs<double>();
310
311 // qDebug() << "driftTime:" << driftTime;
312
313 // Old version requiring use of QString.
314 // pappso_double driftTime =
315 // QString(spectrum_p->scanList.scans[0]
316 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
317 //.value.c_str())
318 //.toDouble();
319
320 // Now make positively sure that the obtained value is correct.
321 // Note that I suffered a lot with Waters Synapt data that
322 // contained apparently correct drift time XML element that in
323 // fact contained either NaN or inf. When such mass spectra were
324 // encountered, the mz,i data were bogus and crashed the data
325 // loading functions. We just want to skip this kind of bogus mass
326 // spectrum by letting the caller know that the drift time was
327 // bogus ("I" is Filippo Rusconi).
328
329 if(std::isnan(driftTime) || std::isinf(driftTime))
330 {
331 // qDebug() << "detected as nan or inf.";
332
333 return false;
334 }
335 else
336 {
337 // The mzML standard stipulates that drift times are in
338 // milliseconds.
339 qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
340 }
341 }
342 // End of
343 // if(spectrum_p->scanList.scans[0].hasCVParam(
344 // pwiz::msdata::MS_ion_mobility_drift_time))
345 else
346 {
347 // Not a bogus mass spectrum but also not a drift spectrum, set -1
348 // as the drift time value.
349 qualified_mass_spectrum.setDtInMilliSeconds(-1);
350 }
351
352 return true;
353}
354
355
358 const MassSpectrumId &massSpectrumId,
359 pwiz::msdata::Spectrum *spectrum_p,
360 bool want_binary_data,
361 bool &ok) const
362{
363 // qDebug();
364
365 std::string env;
366 env = setlocale(LC_ALL, "");
367 setlocale(LC_ALL, "C");
368
369 QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
370
371 try
372 {
373
374 // We want to store the ms level for this spectrum
375
376 int msLevel =
377 (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
378
379 qualified_mass_spectrum.setMsLevel(msLevel);
380
381 if(!spectrum_p->scanList.scans[0].hasCVParam(
382 pwiz::msdata::MS_peak_list_scans))
383 {
384
385 qDebug() << spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
386 .valueAs<double>();
387 qualified_mass_spectrum.setParameterValue(
389 spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
390 .valueAs<double>());
391 }
392 // We want to know if this spectrum is a fragmentation spectrum obtained
393 // from a selected precursor ion.
394
395 std::size_t precursor_list_size = spectrum_p->precursors.size();
396
397 // qDebug() << "For spectrum at index:" <<
398 // massSpectrumId.getSpectrumIndex()
399 //<< "msLevel:" << msLevel
400 //<< "with number of precursors:" << precursor_list_size;
401
402 if(precursor_list_size > 0)
403 {
404
405 // Sanity check
406 if(msLevel < 2)
407 {
408 qDebug() << "Going to throw: msLevel cannot be less than two for "
409 "a spectrum that has items in its Precursor list.";
410
412 "msLevel cannot be less than two for "
413 "a spectrum that has items in its Precursor list."));
414 }
415
416 // See what is the first precursor in the list.
417
418 for(auto &precursor : spectrum_p->precursors)
419 {
420
421 // Set this variable ready as we need that default value in
422 // certain circumstances.
423
424 std::size_t precursor_spectrum_index =
425 std::numeric_limits<std::size_t>::max();
426
427 // The spectrum ID of the precursor might be empty.
428
429 if(precursor.spectrumID.empty())
430 {
431 // qDebug() << "The precursor's spectrum ID is empty.";
432
433 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
434 {
435 // qDebug()
436 //<< "Format is MGF, precursor's spectrum ID can be
437 // empty.";
438 }
439 else
440 {
441 // When performing Lumos Fusion fragmentation experiments
442 // in Tune mode and with recording, the first spectrum of
443 // the list is a fragmentation spectrum (ms level 2) that
444 // has no identity for the precursor spectrum because
445 // there is no full scan accquisition.
446 }
447 }
448 // End of
449 // if(precursor.spectrumID.empty())
450 else
451 {
452 // We could get a native precursor spectrum id, so convert
453 // that native id to a spectrum index.
454
455 qualified_mass_spectrum.setPrecursorNativeId(
456 QString::fromStdString(precursor.spectrumID));
457
458 if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
459 {
460 // qDebug() << "The native id of the precursor spectrum is
461 // empty.";
462 }
463
464 // Get the spectrum index of the spectrum that contained the
465 // precursor ion.
466
467 precursor_spectrum_index =
468 msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
469
470 // Note that the Mascot MGF format has a peculiar handling of
471 // the precursor ion stuff so we cannot throw.
472 if(precursor_spectrum_index ==
473 msp_msData->run.spectrumListPtr->size())
474 {
475 if(mcsp_msRunId.get()->getMzFormat() != MzFormat::MGF)
476 {
478 "Failed to find the index of the "
479 "precursor ion's spectrum."));
480 }
481 }
482
483 qualified_mass_spectrum.setPrecursorSpectrumIndex(
484 precursor_spectrum_index);
485
486 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
487 // "()"
488 //<< "Set the precursor spectrum index to:"
489 //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
490 //<< "for qualified mass spectrum:"
491 //<< &qualified_mass_spectrum;
492 }
493
494 if(!precursor.selectedIons.size())
495 {
496 qDebug()
497 << "Going to throw The spectrum has msLevel > 1 but the "
498 "precursor ions's selected ions list is empty..";
499
500 throw(
501 ExceptionNotPossible("The spectrum has msLevel > 1 but the "
502 "precursor ions's selected ions "
503 "list is empty."));
504 }
505
506 pwiz::msdata::SelectedIon &ion =
507 *(precursor.selectedIons.begin());
508
509 // selected ion m/z
510
511 pappso_double selected_ion_mz =
512 QString(
513 ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
514 .toDouble();
515
516 // selected ion peak intensity
517
518 pappso_double selected_ion_peak_intensity =
519 QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
520 .toDouble();
521
522 // charge state
523
524 unsigned int selected_ion_charge_state =
525 QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
526 .toUInt();
527
528 // At this point we can craft a new PrecursorIonData instance and
529 // push it back to the vector.
530
531 PrecursorIonData precursor_ion_data(selected_ion_mz,
532 selected_ion_charge_state,
533 selected_ion_peak_intensity);
534
535 qualified_mass_spectrum.appendPrecursorIonData(
536 precursor_ion_data);
537
538 // General sum-up
539
540 // qDebug()
541 //<< "Appended new PrecursorIonData:"
542 //<< "mz:"
543 //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
544 //<< "charge:"
545 //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
546 //<< "intensity:"
547 //<< qualified_mass_spectrum.getPrecursorIonData()
548 //.back()
549 //.intensity;
550 }
551 // End of
552 // for(auto &precursor : spectrum_p->precursors)
553 }
554 // End of
555 // if(precursor_list_size > 0)
556 else
557 {
558 // Sanity check
559
560 // Unfortunately, logic here is defeated by some vendors that have
561 // files with MS2 spectra without <precursorList>. Thus we have
562 // spectrum_p->precursors.size() == 0 and msLevel > 1.
563
564 // if(msLevel != 1)
565 //{
566 // throw(
567 // ExceptionNotPossible("msLevel cannot be different than 1 if "
568 //"there is not a single precursor ion."));
569 //}
570 }
571
572 // Sanity check.
573
574 if(precursor_list_size !=
575 qualified_mass_spectrum.getPrecursorIonData().size())
576 {
577 qDebug() << "Going to throw The number of precursors in the file is "
578 "different from the number of precursors in memory.";
579
581 QObject::tr("The number of precursors in the file is different "
582 "from the number of precursors in memory."));
583 }
584
585 // if(precursor_list_size == 1)
586 //{
587 // qDebug() << "Trying to get the mz value of the unique precursor ion:"
588 //<< qualified_mass_spectrum.getPrecursorMz();
589 //}
590
591 processRetentionTime(spectrum_p, qualified_mass_spectrum);
592
593 processDriftTime(spectrum_p, qualified_mass_spectrum);
594
595 // for(pwiz::data::CVParam cv_param : ion.cvParams)
596 //{
597 // pwiz::msdata::CVID param_id = cv_param.cvid;
598 // qDebug() << param_id;
599 // qDebug() << cv_param.cvid.c_str();
600 // qDebug() << cv_param.name().c_str();
601 // qDebug() << cv_param.value.c_str();
602 //}
603
604 if(want_binary_data)
605 {
606
607 // Fill-in MZIntensityPair vector for convenient access to binary
608 // data
609
610 std::vector<pwiz::msdata::MZIntensityPair> pairs;
611 spectrum_p->getMZIntensityPairs(pairs);
612
613 MassSpectrum spectrum;
614 double tic = 0;
615 // std::size_t iterCount = 0;
616
617 // Iterate through the m/z-intensity pairs
618 for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
619 it = pairs.begin(),
620 end = pairs.end();
621 it != end;
622 ++it)
623 {
624 //++iterCount;
625
626 // qDebug() << "it->mz " << it->mz << " it->intensity" <<
627 // it->intensity;
628 if(it->intensity)
629 {
630 spectrum.push_back(DataPoint(it->mz, it->intensity));
631 tic += it->intensity;
632 }
633 }
634
635 if(mcsp_msRunId.get()->getMzFormat() == MzFormat::MGF)
636 {
637 // Sort peaks by mz
638 spectrum.sortMz();
639 }
640
641 // lc = localeconv ();
642 // qDebug() << " env=" << localeconv () << " lc->decimal_point "
643 // << lc->decimal_point;
644 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
645 // "<< spectrum.size();
646 MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
647 qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
648
649 // double sumY =
650 // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
651 // <<
652 // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
653 //<< "iterCount:" << iterCount << "Spectrum size "
654 //<< spectrum.size() << "with tic:" << tic
655 //<< "and sumY:" << sumY;
656 }
657 else
658 qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
659 }
660 catch(PappsoException &errorp)
661 {
662 qDebug() << "Going to throw";
663
665 QObject::tr("Error reading data using the proteowizard library: %1")
666 .arg(errorp.qwhat()));
667 }
668 catch(std::exception &error)
669 {
670 qDebug() << "Going to throw";
671
673 QObject::tr("Error reading data using the proteowizard library: %1")
674 .arg(error.what()));
675 }
676
677 // setlocale(LC_ALL, env.c_str());
678
679 ok = true;
680
681 // qDebug() << "QualifiedMassSpectrum: " <<
682 // qualified_mass_spectrum.toString();
683 return qualified_mass_spectrum;
684}
685
686
689 bool want_binary_data,
690 bool &ok) const
691{
692
693 std::string env;
694 env = setlocale(LC_ALL, "");
695 // struct lconv *lc = localeconv();
696
697 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
698 //<< "env=" << env.c_str()
699 //<< "lc->decimal_point:" << lc->decimal_point;
700
701 setlocale(LC_ALL, "C");
702
703 MassSpectrumId massSpectrumId(mcsp_msRunId);
704
705 if(msp_msData == nullptr)
706 {
707 setlocale(LC_ALL, env.c_str());
708 return (QualifiedMassSpectrum(massSpectrumId));
709 }
710
711 // const bool want_binary_data = true;
712
713 pwiz::msdata::SpectrumListPtr spectrum_list_p =
714 msp_msData->run.spectrumListPtr;
715
716 if(spectrum_index == spectrum_list_p.get()->size())
717 {
718 setlocale(LC_ALL, env.c_str());
719 throw ExceptionNotFound(
720 QObject::tr("The spectrum index cannot be equal to the size of the "
721 "spectrum list."));
722 }
723
724 // At this point we know the spectrum index might be sane, so store it in
725 // the mass spec id object.
726 massSpectrumId.setSpectrumIndex(spectrum_index);
727
728 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
729 getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
730
731 setlocale(LC_ALL, env.c_str());
732
733 massSpectrumId.setNativeId(
734 QString::fromStdString(native_pwiz_spectrum_sp->id));
735
737 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
738}
739
740
741bool
742PwizMsRunReader::accept(const QString &file_name) const
743{
744 // We want to know if we can handle the file_name.
745 pwiz::msdata::ReaderList reader_list;
746
747 std::string reader_type = reader_list.identify(file_name.toStdString());
748
749 if(!reader_type.empty())
750 return true;
751
752 return false;
753}
754
755
757PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
758{
759 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
760 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
761}
762
764PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
765{
766 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
767 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
768}
769
771PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
772 bool want_binary_data) const
773{
774
775 QualifiedMassSpectrum spectrum;
776 bool ok = false;
777
778 spectrum =
779 qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
780
781 if(mcsp_msRunId->getMzFormat() == pappso::MzFormat::MGF)
782 {
783 if(spectrum.getRtInSeconds() == 0)
784 {
785 // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
786 }
787 }
788
789 // if(!ok)
790 // qDebug() << "Encountered a mass spectrum for which the status is bad.";
791
792 return spectrum;
793}
794
795
796void
802
803void
805 [[maybe_unused]] const MsRunReadConfig &config,
807{
808 qDebug();
810}
811
812void
814 SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
815{
816
818 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
819
820 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
821 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
822 // spectrum has been fully qualified (that is, the member data have been
823 // set), it is transferred to the handler passed as parameter to this
824 // function for the consumer to do what it wants with it.
825
826 // Does the handler consuming the mass spectra read from file want these
827 // mass spectra to hold the binary data arrays (mz/i vectors)?
828
829 const bool want_binary_data = handler.needPeakList();
830
831
832 std::string env;
833 env = setlocale(LC_ALL, "");
834 setlocale(LC_ALL, "C");
835
836
837 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
838 // run member of msp_msData.
839
840 pwiz::msdata::SpectrumListPtr spectrum_list_p =
841 msp_msData->run.spectrumListPtr;
842
843 // We'll need it to perform the looping in the spectrum list.
844 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
845
846 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
847
848 // Inform the handler of the spectrum list so that it can handle feedback to
849 // the user.
850 handler.spectrumListHasSize(spectrum_list_size);
851
852 // Iterate in the full list of spectra.
853
854 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
855 {
856
857 // If the user of this reader instance wants to stop reading the
858 // spectra, then break this loop.
859 if(handler.shouldStop())
860 {
861 qDebug() << "The operation was cancelled. Breaking the loop.";
862 break;
863 }
864
865 // Get the native pwiz-spectrum from the spectrum list.
866 // Note that this pointer is a shared pointer from pwiz.
867
868 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
869 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
870
871 /*
872 * we want to load metadata of the spectrum even if it does not contain
873 peaks
874
875 * if(!native_pwiz_spectrum_sp->hasBinaryData())
876 {
877 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
878 "
879 ()"
880 //<< "native pwiz spectrum is empty, continuing.";
881 continue;
882 }
883 */
884
885 // Instantiate the mass spectrum id that will hold critical information
886 // like the the native id string and the spectrum index.
887
888 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
889
890 // Get the spectrum native id as a QString to store it in the mass
891 // spectrum id class. This is will allow later to refer to the same
892 // spectrum starting back from the file.
893
894 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
895 massSpectrumId.setNativeId(native_id);
896
897 // Finally, instantiate the qualified mass spectrum with its id. This
898 // function will continue performing pappso-spectrum detailed
899 // qualification.
900
901 bool ok = false;
902
903 QualifiedMassSpectrum qualified_mass_spectrum =
905 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
906
907 if(!ok)
908 {
909 // qDebug() << "Encountered a mass spectrum for which the returned "
910 //"status is bad.";
911 continue;
912 }
913
914 // Before handing the mass spectrum out to the handler, see if the
915 // native mass spectrum was empty or not.
916
917 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
918 // qDebug() << "The mass spectrum has not defaultArrayLength";
919
920 qualified_mass_spectrum.setEmptyMassSpectrum(
921 !native_pwiz_spectrum_sp->defaultArrayLength);
922
923 // The handler will receive the index of the mass spectrum in the
924 // current run via the mass spectrum id member datum.
925 if(ms_level == 0)
926 {
927 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
928 }
929 else
930 {
931 if(qualified_mass_spectrum.getMsLevel() == ms_level)
932 {
933 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
934 }
935 }
936 }
937
938 setlocale(LC_ALL, env.c_str());
939 // End of
940 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
941
942 // Now let the loading handler know that the loading of the data has ended.
943 // The handler might need this "signal" to perform additional tasks or to
944 // cleanup cruft.
945
946 // qDebug() << "Loading ended";
947 handler.loadingEnded();
948}
949
950void
953{
954 qDebug();
956 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
957
958 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
959 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
960 // spectrum has been fully qualified (that is, the member data have been
961 // set), it is transferred to the handler passed as parameter to this
962 // function for the consumer to do what it wants with it.
963
964 // Does the handler consuming the mass spectra read from file want these
965 // mass spectra to hold the binary data arrays (mz/i vectors)?
966
967 const bool want_binary_data = config.needPeakList();
968
969
970 std::string env;
971 env = setlocale(LC_ALL, "");
972 setlocale(LC_ALL, "C");
973
974
975 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
976 // run member of msp_msData.
977
978 pwiz::msdata::SpectrumListPtr spectrum_list_p =
979 msp_msData->run.spectrumListPtr;
980
981 // We'll need it to perform the looping in the spectrum list.
982 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
983
984 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
985
986 // Inform the handler of the spectrum list so that it can handle feedback to
987 // the user.
988 handler.spectrumListHasSize(spectrum_list_size);
989
990 // Iterate in the full list of spectra.
991
992 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
993 {
994
995
996 // If the user of this reader instance wants to stop reading the
997 // spectra, then break this loop.
998 if(handler.shouldStop())
999 {
1000 qDebug() << "The operation was cancelled. Breaking the loop.";
1001 break;
1002 }
1003
1004 // Get the native pwiz-spectrum from the spectrum list.
1005 // Note that this pointer is a shared pointer from pwiz.
1006
1007 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
1008 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
1009
1010 /*
1011 * we want to load metadata of the spectrum even if it does not contain
1012 peaks
1013
1014 * if(!native_pwiz_spectrum_sp->hasBinaryData())
1015 {
1016 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
1017 "
1018 ()"
1019 //<< "native pwiz spectrum is empty, continuing.";
1020 continue;
1021 }
1022 */
1023
1024 // Instantiate the mass spectrum id that will hold critical information
1025 // like the the native id string and the spectrum index.
1026
1027 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
1028
1029 // Get the spectrum native id as a QString to store it in the mass
1030 // spectrum id class. This is will allow later to refer to the same
1031 // spectrum starting back from the file.
1032
1033 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
1034 massSpectrumId.setNativeId(native_id);
1035
1036 // Finally, instantiate the qualified mass spectrum with its id. This
1037 // function will continue performing pappso-spectrum detailed
1038 // qualification.
1039
1040 bool ok = false;
1041
1042 QualifiedMassSpectrum qualified_mass_spectrum =
1044 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
1045
1046 if(!ok)
1047 {
1048 // qDebug() << "Encountered a mass spectrum for which the returned "
1049 //"status is bad.";
1050 continue;
1051 }
1052
1053 // Before handing the mass spectrum out to the handler, see if the
1054 // native mass spectrum was empty or not.
1055
1056 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
1057 // qDebug() << "The mass spectrum has not defaultArrayLength";
1058
1059 qualified_mass_spectrum.setEmptyMassSpectrum(
1060 !native_pwiz_spectrum_sp->defaultArrayLength);
1061
1062 // The handler will receive the index of the mass spectrum in the
1063 // current run via the mass spectrum id member datum.
1064
1065 qDebug();
1066 if(config.acceptMsLevel(qualified_mass_spectrum.getMsLevel()))
1067 {
1069 qualified_mass_spectrum.getRtInSeconds()))
1070 {
1071 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
1072 }
1073 }
1074 }
1075
1076 setlocale(LC_ALL, env.c_str());
1077 // End of
1078 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1079
1080 // Now let the loading handler know that the loading of the data has ended.
1081 // The handler might need this "signal" to perform additional tasks or to
1082 // cleanup cruft.
1083
1084 // qDebug() << "Loading ended";
1085 handler.loadingEnded();
1086}
1087
1088std::size_t
1090{
1091 return msp_msData->run.spectrumListPtr.get()->size();
1092}
1093
1094bool
1096{
1097 return m_hasScanNumbers;
1098}
1099
1100bool
1102{
1103 msp_msData = nullptr;
1104 return true;
1105}
1106
1107bool
1109{
1110 if(msp_msData == nullptr)
1111 {
1112 initialize();
1113 }
1114 return true;
1115}
1116
1117
1120 std::size_t spectrum_index, pappso::PrecisionPtr precision) const
1121{
1122
1123 QualifiedMassSpectrum mass_spectrum =
1124 qualifiedMassSpectrum(spectrum_index, false);
1125
1126 return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
1127}
1128
1131 const pappso::QualifiedMassSpectrum &mass_spectrum,
1132 pappso::PrecisionPtr precision) const
1133{
1134 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
1135
1136 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
1137
1138 xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
1139
1140 return xic_coord;
1141}
1142
1143} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
bool acceptMsLevel(std::size_t ms_level) const
bool acceptRetentionTimeInSeconds(double retention_time_in_seconds) const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:63
MsRunIdCstSPtr mcsp_msRunId
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
virtual void readSpectrumCollectionWithMsrunReadConfig(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setParameterValue(QualifiedMassSpectrumParameter parameter, const QVariant &value)
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition utils.cpp:143
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
@ ScanNumber
scan number: Ordinal number of the scan indicating its order of acquisition within a mass spectrometr...
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
std::shared_ptr< XicCoord > XicCoordSPtr
Definition xiccoord.h:43
MSrun file reader base on proteowizard library.