libStatGen Software 1
Loading...
Searching...
No Matches
IndexBase.h
1/*
2 * Copyright (C) 2011-2012 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __INDEX_BASE_H__
19#define __INDEX_BASE_H__
20
21#include <stdint.h>
22#include <vector>
23#include <map>
24#include <stdlib.h>
25
26#include "InputFile.h"
27#include "StatGenStatus.h"
28
29
30class Chunk
31{
32public:
33 uint64_t chunk_beg; // offset of the start of the chunk
34 uint64_t chunk_end; // offset of the end of the chunk
35
36 static const uint64_t MAX_CHUNK_VALUE = 0xFFFFFFFFFFFFFFFFULL;
37
38 bool operator< (const Chunk& otherChunk) const
39 {
40 return(this->chunk_beg < otherChunk.chunk_beg);
41 }
42};
43
44
45// This class contains chunks that are sorted by the beginning position.
46// This class hides how the chunks are actually stored (map, list ,etc),
47// so they can be interchanged.
49{
50public:
51 // Returns the first chunk in the list and removes it.
52 Chunk pop();
53 bool insert(const Chunk& chunkToInsert);
54 void clear();
55 bool empty();
56 bool mergeOverlapping();
57
58private:
59 std::map<uint64_t, Chunk> chunkList;
60};
61
63{
64public:
65
66 IndexBase();
67 virtual ~IndexBase();
68
69 /// Reset the member data for a new index file.
70 virtual void resetIndex();
71
72 // Read & parse the specified index file.
73 /// \param filename the bam index file to be read.
74 /// \return the status of the read.
75 virtual StatGenStatus::Status readIndex(const char* filename) = 0;
76
77 /// Get the number of references in this index.
78 /// \return number of references
79 int32_t getNumRefs() const;
80
81 // Returns the minimum offset of records that cross the 16K block that
82 // contains the specified position for the given reference id.
83 bool getMinOffsetFromLinearIndex(int32_t refID, uint32_t position,
84 uint64_t& minOffset) const;
85
86protected:
87 const static uint32_t MAX_NUM_BINS = 37450; // per specs, at most 37450 bins
88
89 // Maximum allowed position (inclusive 512MB - 1)
90 // NOTE: CSI index may not have this same max position.
91 const static uint32_t MAX_POSITION = 536870911;
92
93 // Number of bits in 1 linear index - how much to shift a position by
94 // to determine which offset into the linear index to look for it.
95 const static uint32_t LINEAR_INDEX_SHIFT = 14;
96
97 class Bin
98 {
99 public:
100 Bin(){chunks = NULL; reset();}
101 ~Bin() {reset();}
102 void reset()
103 {
104 if(chunks != NULL)
105 {
106 free(chunks);
107 chunks = NULL;
108 }
109 n_chunk = 0;
110 bin = NOT_USED_BIN;
111 }
112 uint32_t bin; // The bin id.
113 int32_t n_chunk; // The number of chunks.
114 Chunk* chunks; // The chunks for this bin.
115 static const uint32_t NOT_USED_BIN = 0xFFFFFFFF;
116 };
117
119 {
120 // Add one to the max since there may now be an extra bin containing
121 // the mapped/unmapped counts.
122 public:
123 static const int32_t UNKNOWN_MAP_INFO = -1;
124 Reference(){ioffsets = NULL; reset();}
125 ~Reference(){reset();}
126 void reset()
127 {
128 bins.clear();
129 if(ioffsets != NULL)
130 {
131 free(ioffsets);
132 ioffsets = NULL;
133 }
134 n_bin = 0;
135 n_intv = 0;
136 minChunkOffset = UNSET_MIN_CHUNK_OFFSET;
137 maxChunkOffset = 0;
138 n_mapped = UNKNOWN_MAP_INFO;
139 n_unmapped = UNKNOWN_MAP_INFO;
140 }
141 int32_t n_bin; // The number of bins.
142 int32_t n_intv; // Number of intervals.
143 std::vector<Bin> bins; // The bins for this reference.
144 uint64_t* ioffsets; // Offsets of intervals first alignments
145 uint64_t minChunkOffset;
146 uint64_t maxChunkOffset;
147 int32_t n_mapped; // Number of mapped reads.
148 int32_t n_unmapped; // Number of unmapped reads.
149
150 static const uint64_t UNSET_MIN_CHUNK_OFFSET = 0xFFFFFFFFFFFFFFFFULL;
151 };
152
153 // Set bins in the region to 1 and all other bins to 0.
154 // start is incluive, end is exclusive.
155 static void getBinsForRegion(uint32_t start, uint32_t end, bool binMap[MAX_NUM_BINS+1]);
156
157 // Number of reference sequences.
158 int32_t n_ref;
159
160 // The references.
161 std::vector<Reference> myRefs;
162};
163
164
165#endif
int32_t getNumRefs() const
Get the number of references in this index.
virtual StatGenStatus::Status readIndex(const char *filename)=0
virtual void resetIndex()
Reset the member data for a new index file.
Status
Return value enum for StatGenFile methods.