libStatGen Software 1
Loading...
Searching...
No Matches
SamFileHeader.h
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __SAM_FILE_HEADER_H__
19#define __SAM_FILE_HEADER_H__
20
21#include <map>
22#include <stdint.h>
23
24#include "SamReferenceInfo.h"
25#include "SamHeaderHD.h"
26#include "SamHeaderSQ.h"
27#include "SamHeaderRG.h"
28#include "SamHeaderPG.h"
29
30/// This class allows a user to get/set the fields in a SAM/BAM Header.
31/// Sam/Bam headers contain comments and multiple SamHeaderRecords
32/// (HD, SQs, RGs, PGs) comprised of tag/value pairs with each tag only
33/// appearing once within a specific record.
35{
36public:
39
40 /////////////////////////////
41 /// @name Copying a Header
42 /// These methods are ways of copying the contents of one header into
43 /// another one.
44 //@{
45
46 /// Copy Constructor copies the specified header into this one.
47 SamFileHeader(const SamFileHeader& header);
48
49 /// Overload operator = to copy the passed in header into this header.
51
52 /// Copy method copies the passed in header into this header.
53 /// Returns true if at least one header line was successfully copied.
54 bool copy(const SamFileHeader& header);
55 //@}
56
57 /// Initialize the header.
58 void resetHeader();
59
60 /////////////////////////////
61 /// @name Get the Entire Header
62 /// Get the entire header as a single string.
63 //@{
64
65 /// Set the passed in string to the entire header string, clearing its
66 /// current contents.
67 /// \return true if successfully set (even if set to "")
68 bool getHeaderString(std::string& header) const;
69
70 //@}
71
72 /// Get the reference ID for the specified reference name (chromosome).
73 /// If addID is set to true, a reference id will be created for the
74 /// referenceName if one does not already exist. If addID is set to
75 /// false (default), it will return SamReferenceInfo::NO_REF_ID.
76 int getReferenceID(const String & referenceName, bool addID = false);
77
78 /// Get the reference ID for the specified reference name (chromosome).
79 /// If addID is set to true, a reference id will be created for the
80 /// referenceName if one does not already exist. If addID is set to
81 /// false (default), it will return SamReferenceInfo::NO_REF_ID.
82 int getReferenceID(const char* referenceName, bool addID = false);
83
84 /// Return the reference name (chromosome) for the specified reference id.
85 const String & getReferenceLabel(int id) const;
86
87 /// Get the Reference Information
89
90 // Get the Reference Information for updating separately when reading
91 // BAMs...should only be called by BamInterface.
92 SamReferenceInfo& getReferenceInfoForBamInterface();
93
94 ////////////////////////////////////////////////////////////////////////
95 // Set Values in the header
96 ////////////////////////////////////////////////////////////////////////
97
98 /////////////////////////////////////////
99 /// @name Adding an entire header/comment line.
100 /// These methods are ways of adding an entire header line at once.
101 //@{
102
103 /// Add a header line that is just one tag with a const char* value.
104 /// Note: This method will only do one tag per type on a line, so if a
105 /// type has multiple tags, the whole line needs to be added at once,
106 /// and a different method should be used.
107 bool addHeaderLine(const char* type, const char* tag, const char* value);
108
109 /// Add a header line that is already preformatted in a const char*.
110 /// Returns true if at least one header line was successfully added.
111 bool addHeaderLine(const char* headerLine);
112
113 /// Add a header that is already preformatted in a const char*.
114 /// Returns true if at least one header line was successfully added.
115 bool addHeader(const char* header);
116
117 /// Add the specified comment to the header (do not include "@CO" or "\n").
118 /// \return true if successfully added, false if not.
119 bool addComment(const char* comment);
120
121 //@}
122
123
124 /////////////////////////////////////////
125 /// @name Set/Add/Remove a Single Tag
126 /// The passed in tag should be the two character SAM tag as defined
127 /// in the SAM spec. A tag is removed from the header record by setting
128 /// it to "". For the SQ and RG header types, the key tags (SN for SQ
129 /// and ID for RG) may not be modified or removed once set. This is
130 /// because these values are used as a lookup key for the header record,
131 /// so the entire record must be removed.
132 //@{
133
134// // Set the specified header type tag to the specified value in the
135// // header with the specified keyID. keyID must be specified when
136// // type = SQ, RG, or PG.
137// bool setTag(SamHeaderRecord::SamHeaderRecordType type, const char* tag,
138// const char* value, const char* keyID = NULL);
139
140 /// Set the specified tag to the specified value in the HD header, remove
141 /// the tag by specifying value="".
142 /// \return true if the tag was successfully set, false if not.
143 bool setHDTag(const char* tag, const char* value);
144
145 /// Set the specified tag to the specified value in the SQ header with
146 /// the specified name, remove the tag by specifying value="". If the
147 /// header does not yet exist, the tag must be "LN" and the header is added
148 /// with the specified LN value and the SN value passed in name.
149 /// The SN & LN tags may not be modified or removed after they are
150 /// set unless the entire record is deleted.
151 /// \return true if the tag was successfully set, false if not.
152 bool setSQTag(const char* tag, const char* value, const char* name);
153
154 /// Set the specified tag to the specified value in the RG header with
155 /// the specified id, remove the tag by specifying value="". If the
156 /// header does not yet exist, the header is added and so is the ID tag
157 /// with the value set to the passed in id. The ID tag may not be
158 /// modified or removed after it is set unless the entire record is deleted.
159 /// \return true if the tag was successfully set, false if not.
160 bool setRGTag(const char* tag, const char* value, const char* id);
161
162 /// Set the specified tag to the specified value in the PG header with
163 /// the specified id, remove the tag by specifying value="". If the
164 /// header does not yet exist, the header is added and so is the ID tag
165 /// with the value set to the passed in id. The ID tag may not be
166 /// modified or removed after it is set unless the entire record is deleted.
167 /// \return true if the tag was successfully set, false if not.
168 bool setPGTag(const char* tag, const char* value, const char* id);
169
170 //@}
171
172 /////////////////////////////////////////
173 /// @name Add an Already Setup SamHeaderRecord
174 /// NOTE: These methods add a pointer to the passed in record.
175 /// The header record will be deleted when it's cleaned up from this header.
176 /// NOTE: Do NOT delete the passed in record, the SamFileHeader class
177 /// takes care of that itself.
178 //@{
179
180 /// Add the HD record to the header.
181 /// Note: it adds a pointer to the passed in header record. The header
182 /// record will be deleted when it is cleaned up from this header.
183 /// \return true if the record was successfully added, false otherwise.
184 bool addHD(SamHeaderHD* hd);
185
186 /// Add the SQ record to the header.
187 /// Note: it adds a pointer to the passed in header record. The header
188 /// record will be deleted when it is cleaned up from this header.
189 /// \return true if the record was successfully added, false otherwise.
190 bool addSQ(SamHeaderSQ* sq);
191
192 /// Add the RG record to the header.
193 /// Note: it adds a pointer to the passed in header record. The header
194 /// record will be deleted when it is cleaned up from this header.
195 /// \return true if the record was successfully added, false otherwise.
196 bool addRG(SamHeaderRG* rg);
197
198 /// Add the PG record to the header.
199 /// Note: it adds a pointer to the passed in header record. The header
200 /// record will be deleted when it is cleaned up from this header.
201 /// \return true if the record was successfully added, false otherwise.
202 bool addPG(SamHeaderPG* pg);
203
204 /// Add a copy of the specified header record to the header.
205 /// Note: it creates a new header record that is identical to the specified
206 /// one and adds it to the header. The passed in pointer will not be
207 /// deleted due to this.
208 /// \return true if the record was successfully added, false otherwise.
209 bool addRecordCopy(const SamHeaderRecord& hdrRec);
210
211 //@}
212
213 ////////////////////////////////////////////////////////////////////////
214 /// @name Remove an Entire Header Record
215 //@{
216
217 /// Remove the HD record.
218 /// \return true if successfully removed or did not exist, false if
219 /// the record still exists.
220 bool removeHD();
221
222 /// Remove SQ record with the specified key.
223 /// NOTE: Does not remove it from the BAM index.
224 /// \return true if successfully removed or did not exist, false if
225 /// the record still exists.
226 bool removeSQ(const char* name);
227
228 /// Remove RG record with the specified key.
229 /// \return true if successfully removed or did not exist, false if
230 /// the record still exists.
231 bool removeRG(const char* id);
232
233 /// Remove PG record with the specified key.
234 /// \return true if successfully removed or did not exist, false if
235 /// the record still exists.
236 bool removePG(const char* id);
237
238 //@}
239
240 ////////////////////////////////////////////////////////////////////////
241 /// @name Get a Specific Tag
242 /// These methods return the value associated with the specified tag.
243 /// If the tag does not exist in the record "" is returned.
244 ///
245 /// For SQ, RG, and PG the value returned is for the tag associated with
246 /// the specified key (name/id). If a record with that key does not exist
247 /// or if the tag does not exist for the record with that key, "" is
248 /// returned.
249 //@{
250
251 /// Returns the value associated with the specified HD tag, returning "" if
252 /// the tag does not exist in the header.
253 const char* getHDTagValue(const char* tag);
254
255 /// Get the value associated with the specified tag on the SQ line with
256 /// the specified sequence name, returning "" if the tag or key does
257 /// not exist.
258 const char* getSQTagValue(const char* tag, const char* name);
259
260 /// Get the value associated with the specified tag on the RG line with
261 /// the specified read group identifier, returning "" if the tag or key does
262 /// not exist.
263 const char* getRGTagValue(const char* tag, const char* id);
264
265 /// Get the value associated with the specified tag on the RG line with
266 /// the specified id, returning "" if the tag or key does
267 /// not exist.
268 const char* getPGTagValue(const char* tag, const char* id);
269
270 //@}
271
272 /// Get the number of SQ objects.
273 int getNumSQs();
274
275 /// Get the number of RG objects.
276 int getNumRGs();
277
278 /// Get the number of PG objects.
279 int getNumPGs();
280
281 ////////////////////////////////////////////////////////////////////////
282 /// @name Get a Specific Header Record
283 /// These methods return a reference to the specific record that was
284 /// requested, returning NULL if that record does not exist in the header.
285 ///
286 /// The returned record can be modified to add/remove some tags.
287 /// Since a reference is returned, the SamHeaderFile automatically
288 /// reflects these changes.
289 //@{
290
291 /// Get the HD object, returning NULL if there is no HD record.
293
294 /// Get the SQ object with the specified sequence name, returning NULL
295 /// if there is no SQ object with that key.
296 SamHeaderSQ* getSQ(const char* name);
297
298 /// Get the RG object with the specified read group identifier, returning
299 /// NULL if there is no RG object with that key..
300 SamHeaderRG* getRG(const char* id);
301
302 /// Get the PG object with the specified id, returning NULL
303 /// if there is no PG object with that key..
304 SamHeaderPG* getPG(const char* id);
305
306 //@}
307
308// //////////////////////////////////
309// // Set methods for header fields.
310// bool setVersion(const char* version);
311// bool setSortOrder(const char* sortOrder);
312// bool addSequenceName(const char* sequenceName);
313// bool setSequenceLength(const char* keyID, int sequenceLength);
314// bool setGenomeAssemblyId(const char* keyID, const char* genomeAssemblyId);
315// bool setMD5Checksum(const char* keyID, const char* md5sum);
316// bool setURI(const char* keyID, const char* uri);
317// bool setSpecies(const char* keyID, const char* species);
318// bool addReadGroupID(const char* readGroupID);
319// bool setSample(const char* keyID, const char* sample);
320// bool setLibrary(const char* keyID, const char* library);
321// bool setDescription(const char* keyID, const char* description);
322// bool setPlatformUnit(const char* keyID, const char* platform);
323// bool setPredictedMedianInsertSize(const char* keyID, const char* isize);
324// bool setSequencingCenter(const char* keyID, const char* center);
325// bool setRunDate(const char* keyID, const char* runDate);
326// bool setTechnology(const char* keyID, const char* technology);
327// bool addProgram(const char* programID);
328// bool setProgramVersion(const char* keyID, const char* version);
329// bool setCommandLine(const char* keyID, const char* commandLine);
330
331// ///////////////////////////////////
332// // Get methods for header fields.
333// // Returns the number of SQ entries in the header.
334// int32_t getSequenceDictionaryCount();
335
336 /// Return the Sort Order value that is set in the Header, returning ""
337 /// if this field does not exist.
338 const char* getSortOrder();
339
340
341 /// DEPRECATED
342 const char* getTagSO();
343
344 /////////////////////////////
345 /// @name Get the Header Record/Comment/Line by Record/Comment/Line
346 /// These methods iterate through the header.
347 /// NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
348 /// same iterator. getNextHeaderRecord that takes a header type
349 /// uses the same iterator as the getNextXXRecord with that type.
350 /// Otherwise the iterators are independent.
351 //@{
352
353 /// Get the next SQ header record. After all SQ headers have been
354 /// retrieved, NULL is returned until a reset is called.
355 /// Independent from getNextHeaderRecord, getNextHeaderLine and the
356 /// other getNextXXRecord methods and the associated reset methods.
358
359 /// Get the next RG header record. After all RG headers have been
360 /// retrieved, NULL is returned until a reset is called.
361 /// Independent from getNextHeaderRecord, getNextHeaderLine and the
362 /// other getNextXXRecord methods and the associated reset methods.
364
365 /// Get the next PG header record. After all PG headers have been
366 /// retrieved, NULL is returned until a reset is called.
367 /// Independent from getNextHeaderRecord, getNextHeaderLine and the
368 /// other getNextXXRecord methods and the associated reset methods.
370
371 /// Reset to the beginning of the header records so the next call
372 /// to getNextSQRecord returns the first SQ header record.
373 void resetSQRecordIter();
374
375 /// Reset to the beginning of the header records so the next call
376 /// to getNextRGRecord returns the first RG header record.
377 void resetRGRecordIter();
378
379 /// Reset to the beginning of the header records so the next call
380 /// to getNextPGRecord returns the first PG header record.
381 void resetPGRecordIter();
382
383 /// Get the next header record of the specified type starting from the
384 /// specified index and update the index.
385 /// After all headers of that type have been retrieved,
386 /// NULL is returned until a reset is called for that type.
387 SamHeaderRecord* getNextHeaderRecord(uint32_t& index,
389
390 /// Get the next header record, but not comment line. After all headers
391 /// have been retrieved, NULL is returned until a reset is called.
392 /// NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
393 /// same iterator.
395
396 /// Set the passed in string to the next header line, overwritting
397 /// the passed in string. If there are no more header lines or there
398 /// is an error, false is returned and the passed in string is set to ""
399 /// until a rest is called.
400 /// NOTE: both getNextHeaderRecord and getNextHeaderLine increment the
401 /// same iterator.
402 bool getNextHeaderLine(std::string &headerLine);
403
404 /// Reset to the beginning of the header records so the next call
405 /// to getNextHeaderRecord returns the first header line.
407
408 /// Append all of the comment lines to the specified string.
409 void appendCommentLines(std::string &commentLines);
410
411 /// Returns the comment on the next comment line. Returns "" if all comment
412 /// lines have been returned, until resetCommentIter is called.
413 const char* getNextComment();
414
415 /// Resets to the beginning of the comments so getNextComment returns
416 /// the first comment.
417 void resetCommentIter();
418
419 //@}
420
421
422 /// Get the failure message if a method returned failure.
423 const char* getErrorMessage() { return(myErrorMessage.c_str()); }
424
425 static const std::string EMPTY_RETURN;
426
427private:
428 // Parse the header string.
429 bool parseHeader(String& header);
430
431 // Parse the specified line of the header.
432 bool parseHeaderLine(const String& headerLine);
433
434 // Set the passed in string to the header line at the specified index.
435 // It does NOT clear the current contents of header.
436 bool getHeaderLine(unsigned int index, std::string& header) const;
437
438 int16_t makeKey(char ch1, char ch2)
439 {
440 return((ch1 << 8) + ch2);
441 }
442
443 // Only one HD type is allowed per file.
444 SamHeaderHD* myHD;
445
446 // There can be multiple SQ Types, indexed by SN.
447 StringHash mySQs;
448
449 // There can be multiple RG Types, indexed by ID.
450 StringHash myRGs;
451
452 // There can be multiple PG types, indexed by ID.
453 StringHash myPGs;
454
455 // Reference Name information
456 SamReferenceInfo myReferenceInfo;
457
458 // Vector of comments
459 std::vector<std::string> myComments;
460
461 std::vector<SamHeaderRecord*> myHeaderRecords;
462
463 std::string myErrorMessage;
464
465 uint32_t myCurrentSQIndex;
466
467 uint32_t myCurrentRGIndex;
468
469 uint32_t myCurrentPGIndex;
470
471 uint32_t myCurrentHeaderIndex;
472
473 uint32_t myCurrentCommentIndex;
474};
475
476#endif
477
This class allows a user to get/set the fields in a SAM/BAM Header.
SamHeaderPG * getPG(const char *id)
Get the PG object with the specified id, returning NULL if there is no PG object with that key.
bool addPG(SamHeaderPG *pg)
Add the PG record to the header.
const char * getErrorMessage()
Get the failure message if a method returned failure.
const char * getSortOrder()
Return the Sort Order value that is set in the Header, returning "" if this field does not exist.
const char * getSQTagValue(const char *tag, const char *name)
Get the value associated with the specified tag on the SQ line with the specified sequence name,...
SamHeaderSQ * getSQ(const char *name)
Get the SQ object with the specified sequence name, returning NULL if there is no SQ object with that...
SamHeaderHD * getHD()
Get the HD object, returning NULL if there is no HD record.
void resetRGRecordIter()
Reset to the beginning of the header records so the next call to getNextRGRecord returns the first RG...
bool setPGTag(const char *tag, const char *value, const char *id)
Set the specified tag to the specified value in the PG header with the specified id,...
const char * getHDTagValue(const char *tag)
Returns the value associated with the specified HD tag, returning "" if the tag does not exist in the...
bool addRG(SamHeaderRG *rg)
Add the RG record to the header.
bool getNextHeaderLine(std::string &headerLine)
Set the passed in string to the next header line, overwritting the passed in string.
SamHeaderRecord * getNextPGRecord()
Get the next PG header record.
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).
bool removePG(const char *id)
Remove PG record with the specified key.
bool addSQ(SamHeaderSQ *sq)
Add the SQ record to the header.
int getNumSQs()
Get the number of SQ objects.
bool removeRG(const char *id)
Remove RG record with the specified key.
bool setSQTag(const char *tag, const char *value, const char *name)
Set the specified tag to the specified value in the SQ header with the specified name,...
bool addComment(const char *comment)
Add the specified comment to the header (do not include "@CO" or "\n").
int getNumRGs()
Get the number of RG objects.
const char * getTagSO()
DEPRECATED.
SamHeaderRecord * getNextHeaderRecord()
Get the next header record, but not comment line.
bool addHD(SamHeaderHD *hd)
Add the HD record to the header.
const char * getNextComment()
Returns the comment on the next comment line.
const char * getRGTagValue(const char *tag, const char *id)
Get the value associated with the specified tag on the RG line with the specified read group identifi...
const String & getReferenceLabel(int id) const
Return the reference name (chromosome) for the specified reference id.
bool addHeaderLine(const char *type, const char *tag, const char *value)
Add a header line that is just one tag with a const char* value.
const SamReferenceInfo & getReferenceInfo() const
Get the Reference Information.
bool removeHD()
Remove the HD record.
void resetSQRecordIter()
Reset to the beginning of the header records so the next call to getNextSQRecord returns the first SQ...
bool setRGTag(const char *tag, const char *value, const char *id)
Set the specified tag to the specified value in the RG header with the specified id,...
bool getHeaderString(std::string &header) const
Set the passed in string to the entire header string, clearing its current contents.
bool copy(const SamFileHeader &header)
Copy method copies the passed in header into this header.
void appendCommentLines(std::string &commentLines)
Append all of the comment lines to the specified string.
void resetHeaderRecordIter()
Reset to the beginning of the header records so the next call to getNextHeaderRecord returns the firs...
int getNumPGs()
Get the number of PG objects.
SamHeaderRecord * getNextRGRecord()
Get the next RG header record.
SamHeaderRecord * getNextSQRecord()
Get the next SQ header record.
void resetHeader()
Initialize the header.
bool removeSQ(const char *name)
Remove SQ record with the specified key.
const char * getPGTagValue(const char *tag, const char *id)
Get the value associated with the specified tag on the RG line with the specified id,...
bool addHeader(const char *header)
Add a header that is already preformatted in a const char*.
void resetPGRecordIter()
Reset to the beginning of the header records so the next call to getNextPGRecord returns the first PG...
SamHeaderRG * getRG(const char *id)
Get the RG object with the specified read group identifier, returning NULL if there is no RG object w...
bool addRecordCopy(const SamHeaderRecord &hdrRec)
Add a copy of the specified header record to the header.
bool setHDTag(const char *tag, const char *value)
Set the specified tag to the specified value in the HD header, remove the tag by specifying value="".
SamFileHeader & operator=(const SamFileHeader &header)
Overload operator = to copy the passed in header into this header.
void resetCommentIter()
Resets to the beginning of the comments so getNextComment returns the first comment.
This class encapsulates the tag value pairs contained with a SAM Header line with accessors for getti...
SamHeaderRecordType
Specifies the Type for the sam header record (line).
Class for tracking the reference information mapping between the reference ids and the reference name...