libStatGen Software 1
Loading...
Searching...
No Matches
GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex > Class Template Reference

Public Member Functions

 GreedyTupleAligner (Weight &wt)
 
int MatchTuple (const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch)
 Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.
 
void Align (QueryType query, int queryLength, ReferenceType reference, ReferenceIndex searchStartIndex, int searchSize, CigarRoller &cigarRoller, ReferenceIndex &matchPosition)
 Core local alignment algorithm.
 

Detailed Description

template<typename QueryType, typename ReferenceType, typename ReferenceIndex>
class GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >

Definition at line 60 of file GreedyTupleAligner.h.

Constructor & Destructor Documentation

◆ GreedyTupleAligner()

template<typename QueryType , typename ReferenceType , typename ReferenceIndex >
GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::GreedyTupleAligner ( Weight wt)
inline

Definition at line 63 of file GreedyTupleAligner.h.

63 : weight(wt)
64 {/* */}

Member Function Documentation

◆ Align()

template<typename QueryType , typename ReferenceType , typename ReferenceIndex >
void GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align ( QueryType  query,
int  queryLength,
ReferenceType  reference,
ReferenceIndex  searchStartIndex,
int  searchSize,
CigarRoller cigarRoller,
ReferenceIndex &  matchPosition 
)
inline

Core local alignment algorithm.

Parameters
queryinput query
queryLengthlength of query
referencereference genome
searchStartIndexmatching starts here
searchSizehow far we will search
cigarRollerstore alignment results here
matchPositionstore match position

Definition at line 159 of file GreedyTupleAligner.h.

167 {
168 // Algorithm:
169 // finished align? (should we try different align position?)
170 // if not, try next tuple
171 // is the tuple aligned?
172 // yes, extend to previous, mark unmatched part mismatch or gap
173 // extend to next matched part
174 int r1 = 0; // a start index: reference starting from r1 (inclusive) will be used
175 int queryMatchCount = 0; // query matched # of bases
176 int q1 = 0; // to align
177 int pos = -1;
178 int lastR1 = -1; // index: record last
179
180 cigarRoller.clear();
181 matchPosition = -1;
182
183 while (queryMatchCount < queryLength)
184 {
185 if (r1 == searchSize - 1) // touched ref right boundary
186 {
187 cigarRoller.Add(CigarRoller::softClip, queryLength-queryMatchCount);
188 break;
189 }
190 if (queryLength - q1 < tupleSize)
191 {
192 // XXX this needs to do something more sane
193 // printf("some bases left!\n");
194 // a simple fix: treat all left-over bases as mismatches/matches
195 cigarRoller.Add(CigarRoller::mismatch, queryLength - queryMatchCount);
196 break;
197 }
198 int mismatch = 0;
199 int matchedLen = 0;
200 if ((pos = MatchTuple(query+q1, queryLength-q1, reference, searchStartIndex + r1, searchSize - r1, matchedLen, mismatch)) // found match position for tuple
201
202 >= 0)
203 {
204 // found match position for tuple
205
206 if (lastR1<0)
207 matchPosition = pos;
208
209 //
210 // deal with left
211 //
212 if (lastR1>=0) // have previously aligned part of the query to the reference genome yet
213 {
214 if (pos > 0)
215 {
216 cigarRoller.Add(CigarRoller::del, pos);
217 }
218 }
219 else
220 {
221 lastR1 = pos;
222 }
223
224 r1 += pos;
225 r1 += matchedLen;
226 q1 += matchedLen;
227
228 //
229 // deal with right
230 //
231 cigarRoller.Add(CigarRoller::match, matchedLen);
232 queryMatchCount = q1;
233 lastR1 = r1;
234
235 continue;
236 } // end if
237
238 //
239 // try insertion
240 // maximum insert ? say 2
241 //
242 for (int i = 1; i < queryLength - q1 - tupleSize; i++)
243 {
244 int mismatch = 0;
245 int matchedLen = 0;
246 // check reference genome broundary
247 if (searchStartIndex + r1 >= reference.getNumberBases())
248 return;
249 if ((pos = MatchTuple(query+q1 + i ,
250 queryLength - q1 -i ,
251 reference,
252 searchStartIndex + r1,
253 searchSize - r1,
254 matchedLen,
255 mismatch)) // found match position for tuple
256 >= 0)
257 {
258 if (matchPosition < 0)
259 matchPosition = pos + q1 + i ;
260 }
261 queryMatchCount += i;
262 q1 += i;
263 cigarRoller.Add(CigarRoller::insert, i);
264
265 lastR1 = r1 + pos;
266 r1 += pos + tupleSize;
267 q1 += tupleSize;
268
269 // deal with right
270 while (searchStartIndex + r1 < reference.getNumberBases()
271 && query[q1]==reference[searchStartIndex + r1]
272 && q1 < queryLength)
273 {
274 r1++;
275 q1++;
276 }
277 if (q1 < queryLength)
278 {
279 cigarRoller.Add(CigarRoller::match, q1 - queryMatchCount);
280 queryMatchCount = q1;
281 }
282 else
283 {
284 cigarRoller.Add(CigarRoller::match, queryLength - queryMatchCount);
285 queryMatchCount = queryLength ;
286 break ;
287 }
288 }
289
290 r1++;
291 q1++;
292
293 // try next
294 } // end while (queryMatchCount < queryLength)
295 }
void Add(Operation operation, int count)
Append the specified operation with the specified count to this object.
void clear()
Clear this object so that it has no Cigar Operations.
@ del
deletion from the reference (the reference contains bases that have no corresponding base in the quer...
Definition Cigar.h:92
@ mismatch
mismatch operation. Associated with CIGAR Operation "M"
Definition Cigar.h:90
@ match
match/mismatch operation. Associated with CIGAR Operation "M"
Definition Cigar.h:89
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Definition Cigar.h:91
@ softClip
Soft clip on the read (clipped sequence present in the query sequence, but not in reference)....
Definition Cigar.h:94
int MatchTuple(const QueryType query, const int queryLength, const ReferenceType reference, const ReferenceIndex searchStartIndex, const int searchSize, int &matchedLength, int &mismatch)
Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to ...

References CigarRoller::Add(), CigarRoller::clear(), Cigar::del, Cigar::insert, Cigar::match, GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple(), Cigar::mismatch, and Cigar::softClip.

◆ MatchTuple()

template<typename QueryType , typename ReferenceType , typename ReferenceIndex >
int GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::MatchTuple ( const QueryType  query,
const int  queryLength,
const ReferenceType  reference,
const ReferenceIndex  searchStartIndex,
const int  searchSize,
int &  matchedLength,
int &  mismatch 
)
inline

Match 'query' to the 'reference' from 'searchStartIndex' up to 'searchSize', store matched length to 'matchedLength' and number of mismatch to 'mismatch'.

Parameters
queryinput query
queryLengthlength of query
referencereference sequence
searchStartIndexthe positino where search starts
searchSizethe total length in reference sequence that will be examine
matchedLengthstore how many bases are matched
mismatchstore how many bases are mismatched
Returns
-1 for unsuccess return

Definition at line 79 of file GreedyTupleAligner.h.

87 {
88 // now use naive search,
89 // TODO: will incorportate KMP serach later
90 // TODO: adjust tolerance of mismatches
91 const int MAX_MISMATCH=2;
92 int bestPos = 0, bestMismatch = queryLength, bestMatchedLength = 0, bestScore=-1;
93
94#if defined(DEBUG_GREEDY_ALIGNER)
95 cout << "searchStartIndex == " << searchStartIndex << ", searchSize == " << searchSize << std::endl;
96#endif
97 // here i is the matching position (inclusive)
98 // j is the matched length
99 for (int i = 0; i <= searchSize - tupleSize; i++)
100 {
101 int j = 0;
102 mismatch = 0;
103 while (j < queryLength)
104 {
105 if (searchStartIndex + i + j >= reference.getNumberBases())
106 break;
107 if (query[j] != reference[searchStartIndex + i + j])
108 {
109 mismatch++;
110 if (mismatch >= MAX_MISMATCH)
111 break;
112 }
113 j++;
114 }
115
116 if (j>0 && (j==queryLength)) j--;
117
118 while (searchStartIndex +i +j < reference.getNumberBases()
119 && ((j+1) > mismatch)
120 && mismatch>0
121 && query[j] != reference[searchStartIndex + i+j])
122 {
123 // if pattern matching goes beyong the preset mismatch cutoff,
124 // we will have to go backwards
125 j--;
126 mismatch--;
127 }
128
129 int score = j - mismatch;
130
131 if (score > bestScore)
132 {
133 bestPos = i;
134 bestScore = score;
135 bestMismatch = mismatch;
136 bestMatchedLength = j+1;
137 }
138 }
139
140 if (bestScore > 0)
141 {
142 mismatch = bestMismatch;
143 matchedLength = bestMatchedLength;
144 return bestPos;
145 }
146 return -1;
147 }

Referenced by GreedyTupleAligner< QueryType, ReferenceType, ReferenceIndex >::Align().


The documentation for this class was generated from the following file: