18 #ifndef __SAM_RECORD_H__
19 #define __SAM_RECORD_H__
23 #include "GenomeSequence.h"
24 #include "SamStatus.h"
26 #include "MathVector.h"
27 #include "StringArray.h"
29 #include "SamFileHeader.h"
30 #include "CigarRoller.h"
37 int32_t myReferenceID;
39 uint32_t myReadNameLength : 8, myMapQuality : 8, myBin : 16;
40 uint32_t myCigarLength : 16, myFlag : 16;
42 int32_t myMateReferenceID;
43 int32_t myMatePosition;
120 const char* referenceName);
164 const char* mateReferenceName);
234 bool addIntTag(
const char* tag, int32_t value);
244 bool addTag(
const char* tag,
char vtype,
const char* value);
254 bool rmTag(
const char* tag,
char type);
264 bool rmTags(
const char* tags);
620 bool checkTag(
const char * tag,
char type);
629 static int MAKEKEY(
char ch1,
char ch2,
char type)
630 {
return (getKeyType(type) << 16) + (ch2 << 8) + ch1; }
632 static char getKeyType(
char type)
652 static inline int getNumericTagTypeSize(
char type)
679 bool allocateRecordStructure(
int size);
681 void* getStringPtr(
int offset);
682 void* getIntegerPtr(
int offset,
char& vtype);
683 void* getFloatPtr(
int offset);
693 void setSequenceAndQualityFromBuffer();
702 bool parseCigarBinary();
706 bool parseCigarString();
710 bool setTagsFromBuffer();
714 bool setTagsInBuffer();
718 void getTypeFromKey(
int key,
char& type)
const;
719 void getTag(
int key,
char* tag)
const;
723 const char & getIntegerType(
int offset)
const;
724 float & getFloat(
int offset);
728 inline void appendIntArrayValue(
int index,
String& strVal)
const
730 appendIntArrayValue(intType[index], integers[index], strVal);
733 void appendIntArrayValue(
char type,
int value,
String& strVal)
const;
735 int getBtagBufferSize(
String& tagStr);
736 int setBtagBuffer(
String& tagStr,
char* extraPtr);
737 int getStringFromBtagBuffer(
unsigned char* buffer,
String& tagStr);
739 static const int DEFAULT_BLOCK_SIZE = 40;
740 static const int DEFAULT_BIN = 4680;
741 static const int DEFAULT_READ_NAME_LENGTH = 8;
742 static const char* DEFAULT_READ_NAME;
743 static const char* FIELD_ABSENT_STRING;
750 uint32_t* myCigarTempBuffer;
753 int myCigarTempBufferAllocatedSize;
756 int myCigarTempBufferLength;
762 bool myIsBufferSynced;
765 bool myNeedToSetTagsFromBuffer;
770 bool myNeedToSetTagsInBuffer;
777 String myMateReferenceName;
782 std::string mySeqWithEq;
783 std::string mySeqWithoutEq;
786 int32_t myAlignmentLength;
788 int32_t myUnclippedStartOffset;
789 int32_t myUnclippedEndOffset;
801 std::vector<char> intType;
802 std::vector<float> floats;
807 bool myIsReadNameBufferValid;
808 bool myIsCigarBufferValid;
809 bool myIsSequenceBufferValid;
810 bool myIsQualityBufferValid;
811 bool myIsTagsBufferValid;
814 unsigned char* myPackedSequence;
815 unsigned char* myPackedQuality;
831 String NOT_FOUND_TAG_STRING;
832 int NOT_FOUND_TAG_INT;
834 static const int myMaxWarns = 5;
835 static int myNumWarns;
bool setMapQuality(uint8_t mapQuality)
Set the mapping quality (MAPQ).
This class represents the CIGAR without any methods to set the cigar (see CigarRoller for that).
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
int32_t get1BasedUnclippedStart()
Returns the 1-based inclusive left-most position adjusted for clipped bases.
int & getInteger(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
bool setSequence(const char *seq)
Sets the sequence (SEQ) to the specified SAM formatted sequence string.
uint16_t getBin()
Get the BAM bin for the record.
int32_t getMateReferenceID()
Get the mate reference id of the record (BAM format: mate_rid/next_refID).
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
bool setMateReferenceName(SamFileHeader &header, const char *mateReferenceName)
Set the mate/next fragment's reference sequence name (RNEXT) to the specified name,...
SamStatus::Status setBuffer(const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
static bool isCharType(char vtype)
Returns whether or not the specified vtype is a char type.
bool checkInteger(const char *tag)
Check if the specified tag contains an integer.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
uint16_t getCigarLength()
Get the length of the BAM formatted CIGAR.
static bool isStringType(char vtype)
Returns whether or not the specified vtype is a string type.
void clearTags()
Clear the tags in this record.
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
HandlingType
This specifies how this class should respond to errors.
uint16_t getFlag()
Get the flag (FLAG).
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.
bool setReadName(const char *readName)
Set QNAME to the passed in name.
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.
@ EQUAL
Translate bases that match the reference to '='.
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
This class is used to track the status results of some methods in the BAM classes.
bool isValid(SamFileHeader &header)
Returns whether or not the record is valid, setting the status to indicate success or failure.
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
bool rmTag(const char *tag, char type)
Remove a tag.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
int32_t get1BasedUnclippedEnd()
Returns the 1-based inclusive right-most position adjusted for clipped bases.
bool set1BasedPosition(int32_t position)
Set the leftmost position (POS) using the specified 1-based (SAM format) value.
int * getIntegerTag(const char *tag)
Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure...
void resetTagIter()
Reset the tag iterator to the beginning of the tags.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
bool getFloatTag(const char *tag, float &tagVal)
Get the float value for the specified tag.
int32_t get0BasedMatePosition()
Get the 0-based(BAM) leftmost mate/next fragment's position.
SamRecord()
Default Constructor.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Status
Return value enum for StatGenFile methods.
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
uint32_t getTagLength()
Returns the length of the BAM formatted tags.
bool setInsertSize(int32_t insertSize)
Sets the inferred insert size (ISIZE)/observed template length (TLEN).
bool setFlag(uint16_t flag)
Set the bitwise FLAG to the specified value.
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
bool rmTags(const char *tags)
Remove tags.
static bool isIntegerType(char vtype)
Returns whether or not the specified vtype is an integer type.
int32_t getReadLength()
Get the length of the read.
bool addTag(const char *tag, char vtype, const char *value)
Add the specified tag,vtype,value to the record.
int32_t getInsertSize()
Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
const char * getCigar()
Returns the SAM formatted CIGAR string.
int32_t getBlockSize()
Get the block size of the record (BAM format).
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
bool checkFloat(const char *tag)
Check if the specified tag contains a string.
int32_t get1BasedMatePosition()
Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
bool setReferenceName(SamFileHeader &header, const char *referenceName)
Set the reference sequence name (RNAME) to the specified name, using the header to determine the refe...
static bool isFloatType(char vtype)
Returns whether or not the specified vtype is a float type.
void resetRecord()
Reset the fields of the record to a default value.
const char * getMateReferenceNameOrEqual()
Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the ...
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
const String & getString(const char *tag)
Get the string value for the specified tag.
@ NONE
Leave the sequence as is.
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
bool shiftIndelsLeft()
Shift the indels (if any) to the left by updating the CIGAR.
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.
bool checkString(const char *tag)
Check if the specified tag contains a string.
const char * getMateReferenceName()
Get the mate/next fragment's reference sequence name (RNEXT).
Structure of a BAM record.
@ BASES
Translate '=' to the actual base.
bool setQuality(const char *quality)
Sets the quality (QUAL) to the specified SAM formatted quality string.
bool getNextSamTag(char *tag, char &vtype, void **value)
Get the next tag from the record.
bool set1BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value...
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.
bool getTagsString(const char *tags, String &returnString, char delim='\t')
Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE...