libStatGen Software 1
Loading...
Searching...
No Matches
Tabix Class Reference
Inheritance diagram for Tabix:
Collaboration diagram for Tabix:

Public Types

enum  Format { FORMAT_GENERIC = 0 , FORMAT_SAM = 1 , FORMAT_VCF = 2 }
 

Public Member Functions

void resetIndex ()
 Reset the member data for a new index file.
 
StatGenStatus::Status readIndex (const char *filename)
 
bool getStartPos (const char *refName, int32_t start, uint64_t &fileStartPos) const
 Get the starting file offset to look for the specified start position.
 
const char * getRefName (unsigned int indexNum) const
 Return the reference name at the specified index or throws an exception if out of range.
 
int32_t getFormat () const
 
- Public Member Functions inherited from IndexBase
int32_t getNumRefs () const
 Get the number of references in this index.
 
bool getMinOffsetFromLinearIndex (int32_t refID, uint32_t position, uint64_t &minOffset) const
 

Additional Inherited Members

- Static Protected Member Functions inherited from IndexBase
static void getBinsForRegion (uint32_t start, uint32_t end, bool binMap[MAX_NUM_BINS+1])
 
- Protected Attributes inherited from IndexBase
int32_t n_ref
 
std::vector< ReferencemyRefs
 
- Static Protected Attributes inherited from IndexBase
static const uint32_t MAX_NUM_BINS = 37450
 
static const uint32_t MAX_POSITION = 536870911
 
static const uint32_t LINEAR_INDEX_SHIFT = 14
 

Detailed Description

Definition at line 31 of file Tabix.h.

Member Enumeration Documentation

◆ Format

enum Tabix::Format

Definition at line 35 of file Tabix.h.

36 {
37 FORMAT_GENERIC = 0,
38 FORMAT_SAM = 1,
39 FORMAT_VCF = 2
40 };

Constructor & Destructor Documentation

◆ Tabix()

Tabix::Tabix ( )

Definition at line 21 of file Tabix.cpp.

22 : IndexBase(),
23 myChromNamesBuffer(NULL)
24{
25}

◆ ~Tabix()

Tabix::~Tabix ( )
virtual

Definition at line 28 of file Tabix.cpp.

29{
30 if(myChromNamesBuffer != NULL)
31 {
32 delete[] myChromNamesBuffer;
33 myChromNamesBuffer = NULL;
34 }
35}

Member Function Documentation

◆ getFormat()

int32_t Tabix::getFormat ( ) const
inline

Definition at line 64 of file Tabix.h.

64{ return myFormat.format; }

◆ getRefName()

const char * Tabix::getRefName ( unsigned int  indexNum) const

Return the reference name at the specified index or throws an exception if out of range.

Definition at line 247 of file Tabix.cpp.

248{
249 if(indexNum >= myChromNamesVector.size())
250 {
251 String message = "ERROR: Out of range on Tabix::getRefName(";
252 message += indexNum;
253 message += ")";
254 throw(std::runtime_error(message.c_str()));
255 return(NULL);
256 }
257 return(myChromNamesVector[indexNum]);
258}

◆ getStartPos()

bool Tabix::getStartPos ( const char *  refName,
int32_t  start,
uint64_t &  fileStartPos 
) const

Get the starting file offset to look for the specified start position.

For an entire reference ID, set start to -1. To start at the beginning of the region, set start to 0/-1.

Definition at line 218 of file Tabix.cpp.

220{
221 // Look for the reference name in the list.
222 int refID = 0;
223 for(refID = 0; refID < n_ref; refID++)
224 {
225 if(strcmp(refName, myChromNamesVector[refID]) == 0)
226 {
227 // found the reference
228 break;
229 }
230 }
231 if(refID >= n_ref)
232 {
233 // Didn't find the refName, so return false.
234 return(false);
235 }
236
237 // Look up in the linear index.
238 if(start < 0)
239 {
240 // Negative index, so start at 0.
241 start = 0;
242 }
243 return(getMinOffsetFromLinearIndex(refID, start, fileStartPos));
244}

◆ readIndex()

StatGenStatus::Status Tabix::readIndex ( const char *  filename)
virtual
Parameters
filenamethe bam index file to be read.
Returns
the status of the read.

Implements IndexBase.

Definition at line 52 of file Tabix.cpp.

53{
54 // Reset the index from anything that may previously be set.
55 resetIndex();
56
57 IFILE indexFile = ifopen(filename, "rb");
58
59 // Failed to open the index file.
60 if(indexFile == NULL)
61 {
63 }
64
65 // read the tabix index structure.
66
67 // Read the magic string.
68 char magic[4];
69 if(ifread(indexFile, magic, 4) != 4)
70 {
71 // Failed to read the magic
73 }
74
75 // If this is not an index file, set num references to 0.
76 if (magic[0] != 'T' || magic[1] != 'B' || magic[2] != 'I' || magic[3] != 1)
77 {
78 // Not a Tabix Index file.
80 }
81
82 // It is a tabix index file.
83 // Read the number of reference sequences.
84 if(ifread(indexFile, &n_ref, 4) != 4)
85 {
86 // Failed to read.
88 }
89
90 // Size the references.
91 myRefs.resize(n_ref);
92
93 // Read the Format configuration.
94 if(ifread(indexFile, &myFormat, sizeof(myFormat)) != sizeof(myFormat))
95 {
96 // Failed to read.
98 }
99
100 // Read the length of the chromosome names.
101 uint32_t l_nm;
102
103 if(ifread(indexFile, &l_nm, sizeof(l_nm)) != sizeof(l_nm))
104 {
105 // Failed to read.
107 }
108
109 // Read the chromosome names.
110 myChromNamesBuffer = new char[l_nm];
111 if(ifread(indexFile, myChromNamesBuffer, l_nm) != l_nm)
112 {
114 }
115 myChromNamesVector.resize(n_ref);
116
117 // Parse out the chromosome names.
118 bool prevNull = true;
119 int chromIndex = 0;
120 for(uint32_t i = 0; i < l_nm; i++)
121 {
122 if(chromIndex >= n_ref)
123 {
124 // already set the pointer for the last chromosome name,
125 // so stop looping.
126 break;
127 }
128 if(prevNull == true)
129 {
130 myChromNamesVector[chromIndex++] = myChromNamesBuffer + i;
131 prevNull = false;
132 }
133 if(myChromNamesBuffer[i] == '\0')
134 {
135 prevNull = true;
136 }
137 }
138
139 for(int refIndex = 0; refIndex < n_ref; refIndex++)
140 {
141 // Read each reference.
142 Reference* ref = &(myRefs[refIndex]);
143
144 // Read the number of bins.
145 if(ifread(indexFile, &(ref->n_bin), 4) != 4)
146 {
147 // Failed to read the number of bins.
148 // Return failure.
150 }
151
152 // Resize the bins.
153 ref->bins.resize(ref->n_bin + 1);
154
155 // Read each bin.
156 for(int binIndex = 0; binIndex < ref->n_bin; binIndex++)
157 {
158 uint32_t binNumber;
159
160 // Read in the bin number.
161 if(ifread(indexFile, &(binNumber), 4) != 4)
162 {
163 // Failed to read the bin number.
164 // Return failure.
166 }
167
168 // Add the bin to the reference and get the
169 // pointer back so the values can be set in it.
170 Bin* binPtr = &(ref->bins[binIndex]);
171 binPtr->bin = binNumber;
172
173 // Read in the number of chunks.
174 if(ifread(indexFile, &(binPtr->n_chunk), 4) != 4)
175 {
176 // Failed to read number of chunks.
177 // Return failure.
179 }
180
181 // Read in the chunks.
182 // Allocate space for the chunks.
183 uint32_t sizeOfChunkList = binPtr->n_chunk * sizeof(Chunk);
184 binPtr->chunks = (Chunk*)malloc(sizeOfChunkList);
185 if(ifread(indexFile, binPtr->chunks, sizeOfChunkList) != sizeOfChunkList)
186 {
187 // Failed to read the chunks.
188 // Return failure.
190 }
191 }
192
193 // Read the number of intervals.
194 if(ifread(indexFile, &(ref->n_intv), 4) != 4)
195 {
196 // Failed to read, set to 0.
197 ref->n_intv = 0;
198 // Return failure.
200 }
201
202 // Allocate space for the intervals and read them.
203 uint32_t linearIndexSize = ref->n_intv * sizeof(uint64_t);
204 ref->ioffsets = (uint64_t*)malloc(linearIndexSize);
205 if(ifread(indexFile, ref->ioffsets, linearIndexSize) != linearIndexSize)
206 {
207 // Failed to read the linear index.
208 // Return failure.
210 }
211 }
212
213 // Successfully read teh bam index file.
215}
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition InputFile.h:562
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition InputFile.h:600
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition InputFile.h:37
@ SUCCESS
method completed successfully.
@ FAIL_IO
method failed due to an I/O issue.
@ FAIL_PARSE
failed to parse a record/header - invalid format.
void resetIndex()
Reset the member data for a new index file.
Definition Tabix.cpp:39

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_PARSE, ifopen(), ifread(), resetIndex(), and StatGenStatus::SUCCESS.

◆ resetIndex()

void Tabix::resetIndex ( )
virtual

Reset the member data for a new index file.

Reimplemented from IndexBase.

Definition at line 39 of file Tabix.cpp.

40{
42 if(myChromNamesBuffer != NULL)
43 {
44 delete[] myChromNamesBuffer;
45 myChromNamesBuffer = NULL;
46 }
47 myChromNamesVector.clear();
48}
virtual void resetIndex()
Reset the member data for a new index file.

References IndexBase::resetIndex().

Referenced by readIndex().


The documentation for this class was generated from the following files: