libStatGen Software  1
glfHandler.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "glfHandler.h"
19 #include "BaseQualityHelper.h"
20 
21 char glfHandler::translateBase[16] = {0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0};
22 char glfHandler::backTranslateBase[5] = { 15, 1, 2, 4, 8 };
23 unsigned char glfHandler::nullLogLikelihoods[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
24 double glfHandler::nullLikelihoods[10] = {1., 1., 1., 1., 1., 1., 1., 1., 1., 1.};
25 
26 glfHandler::glfHandler()
27 {
28  isStub = true;
29  sections = 0;
30  currentSection = 0;
31  maxPosition = position = endOfSection = 0;
32 }
33 
34 glfHandler::~glfHandler()
35 {
36  // Not safe to close the file here in case a copy of the file was generated
37  // if (isOpen())
38  // Close();
39 }
40 
41 bool glfHandler::Open(const String & filename)
42 {
43  isStub = false;
44  handle = ifopen(filename, "rb");
45 
46  if (handle == NULL)
47  {
48  isStub = true;
49  return false;
50  }
51 
52  if (!ReadHeader())
53  ifclose(handle);
54 
55  endOfSection = true;
56 
57  return handle != NULL;
58 }
59 
60 void glfHandler::OpenStub()
61 {
62  isStub = true;
63  handle = NULL;
64 
65  endOfSection = true;
66  data.recordType = 0;
67  maxPosition = 1999999999;
68  position = maxPosition + 1;
69 }
70 
71 bool glfHandler::Create(const String & filename)
72 {
73  isStub = false;
74  // glf is in BGZF format.
75  handle = ifopen(filename, "wb", InputFile::BGZF);
76 
77  if (handle == NULL)
78  {
79  isStub = true;
80  return false;
81  }
82 
83  WriteHeader();
84 
85  return handle != NULL;
86 }
87 
88 bool glfHandler::isOpen()
89 {
90  return handle != NULL;
91 }
92 
93 bool glfHandler::ReadHeader()
94 {
95  if (isStub)
96  return true;
97 
98  if (handle == NULL)
99  return false;
100 
101  char magicNumber[4];
102 
103  if (ifread(handle, magicNumber, 4) != 4)
104  {
105  errorMsg = "unexpected end of file";
106  return false;
107  }
108 
109  if (magicNumber[0] != 'G' || magicNumber[1] != 'L' || magicNumber[2] != 'F')
110  {
111  errorMsg = "invalid format";
112  return false;
113  }
114 
115  if (magicNumber[3] != 3)
116  {
117  errorMsg = "unsupported version";
118  return false;
119  }
120 
121  unsigned int headerLength = 0;
122 
123  if (ifread(handle, &headerLength, 4) != 4)
124  {
125  errorMsg = "unexpected end of file";
126  return false;
127  }
128 
129  if (headerLength > 1024 * 1024)
130  {
131  errorMsg = "header too large -- bailing";
132  return false;
133  }
134 
135  header.SetLength(headerLength + 1);
136  header[headerLength] = 0;
137 
138  if (headerLength && ifread(handle, header.LockBuffer(headerLength + 1), headerLength) != headerLength)
139  {
140  errorMsg = "unexpected end of file";
141  return false;
142  }
143 
144  return true;
145 }
146 
147 void glfHandler::Close()
148 {
149  if (isOpen())
150  ifclose(handle);
151 }
152 
153 void glfHandler::Rewind()
154 {
155  if (isOpen())
156  {
157  ifrewind(handle);
158 
159  if (!ReadHeader())
160  ifclose(handle);
161 
162  endOfSection = true;
163  }
164 }
165 
166 bool glfHandler::NextSection()
167 {
168  if (isStub)
169  {
170  endOfSection = true;
171  data.recordType = 0;
172  maxPosition = 1999999999;
173  position = maxPosition + 1;
174  return true;
175  }
176 
177  while (!endOfSection && !ifeof(handle))
178  NextEntry();
179 
180  endOfSection = false;
181 
182  int labelLength = 0;
183 
184  currentSection++;
185  position = 0;
186  if (ifread(handle, &labelLength, sizeof(int)) == sizeof(int))
187  {
188  ifread(handle, label.LockBuffer(labelLength+1), labelLength * sizeof(char));
189  label.UnlockBuffer();
190 
191  maxPosition = 0;
192  ifread(handle, &maxPosition, sizeof(int));
193 
194  return ((maxPosition > 0) && !ifeof(handle));
195  }
196 
197  return false;
198 }
199 
200 bool glfHandler::NextBaseEntry()
201 {
202  bool result = true;
203 
204  do
205  {
206  result = NextEntry();
207  }
208  while (result && data.recordType == 2);
209 
210  return result;
211 }
212 
213 
214 bool glfHandler::NextEntry()
215 {
216  if (isStub)
217  return false;
218 
219  // Read record type
220  if (endOfSection || (ifread(handle, &data, 1) != 1))
221  {
222  endOfSection = true;
223  data.recordType = 0;
224  position = maxPosition + 1;
225  return false;
226  }
227 
228  // printf("%d/%d\n", data.recordType, data.refBase);
229 
230  if (position > maxPosition)
231  return true;
232 
233  switch (data.recordType)
234  {
235  case 0 :
236  endOfSection = true;
237  position = maxPosition + 1;
238  return true;
239  case 1 :
240  if (ifread(handle,((char *) &data) + 1, sizeof(data) - 1) == sizeof(data) - 1)
241  {
242  data.refBase = translateBase[data.refBase];
243 
244  for (int i = 0; i < 10; i++)
245  likelihoods[i] = bQualityConvertor.toDouble(data.lk[i]);
246 
247  position = position + data.offset;
248  return true;
249  }
250 
251  // Premature end of file
252  data.recordType = 0;
253  position = maxPosition + 1;
254  return false;
255  case 2 :
256  while (ifread(handle, ((char *) &data) + 1, sizeof(data) - 4) == sizeof(data) - 4)
257  {
258  data.refBase = translateBase[data.refBase];
259 
260  for (int i = 0; i < 3; i++)
261  likelihoods[i] = bQualityConvertor.toDouble(data.indel.lk[i]);
262 
263  position = position + data.offset;
264 
265  indelSequence[0].SetLength(abs(data.indel.length[0]) + 1);
266  indelSequence[0][abs(data.indel.length[0])] = 0;
267  if (ifread(handle, indelSequence[0].LockBuffer(), abs(data.indel.length[0])) != (unsigned int) abs(data.indel.length[0]))
268  break;
269 
270  indelSequence[1].SetLength(abs(data.indel.length[1]) + 1);
271  indelSequence[1][abs(data.indel.length[1])] = 0;
272  if (ifread(handle, indelSequence[1].LockBuffer(), abs(data.indel.length[1])) != (unsigned int) abs(data.indel.length[1]))
273  break;
274 
275  return true;
276  }
277 
278  // Premature end of file
279  data.recordType = 0;
280  position = maxPosition + 1;
281  return false;
282  }
283 
284  return false;
285 }
286 
287 glfEntry & glfEntry::operator = (glfEntry & rhs)
288 {
289  refBase = rhs.refBase;
290  recordType = rhs.recordType;
291  offset = rhs.offset;
292  mapQuality = rhs.mapQuality;
293 
294  for (int i = 0; i < 10; i++)
295  lk[i] = rhs.lk[i];
296 
297  minLLK = rhs.minLLK;
298  depth = rhs.depth;
299 
300  return * this;
301 }
302 
303 const double * glfHandler::GetLikelihoods(int pos)
304 {
305  if (pos == position)
306  return likelihoods;
307 
308  return nullLikelihoods;
309 }
310 
311 const unsigned char * glfHandler::GetLogLikelihoods(int pos)
312 {
313  if (pos == position)
314  return data.lk;
315 
316  return nullLogLikelihoods;
317 }
318 
319 char glfHandler::GetReference(int pos, char defaultBase)
320 {
321  if (pos == position)
322  return data.refBase;
323 
324  return defaultBase;
325 }
326 
327 int glfHandler::GetDepth(int pos)
328 {
329  if (pos == position)
330  return data.depth;
331 
332  return 0;
333 }
334 
335 int glfHandler::GetMapQuality(int pos)
336 {
337  if (pos == position)
338  return data.mapQuality;
339 
340  return 0;
341 }
342 
343 void glfHandler::WriteHeader(const String & headerText)
344 {
345  char magicNumber[4] = {'G', 'L', 'F', 3};
346 
347  ifwrite(handle, magicNumber, 4);
348 
349  unsigned int headerLength = headerText.Length();
350 
351  ifwrite(handle, &headerLength, 4);
352  ifwrite(handle, (void *)(const char *) headerText, headerLength);
353 }
354 
355 void glfHandler::BeginSection(const String & sectionLabel, int sectionLength)
356 {
357  int labelLength = sectionLabel.Length() + 1;
358 
359  ifwrite(handle, &labelLength, sizeof(int));
360  ifwrite(handle, (void *)(const char *) sectionLabel, labelLength);
361  ifwrite(handle, &sectionLength, sizeof(int));
362 
363  label = sectionLabel;
364  maxPosition = sectionLength;
365 }
366 
367 void glfHandler::EndSection()
368 {
369  char marker = 0;
370 
371  ifwrite(handle, &marker, sizeof(char));
372 }
373 
374 void glfHandler::WriteEntry(int outputPosition)
375 {
376  data.offset = outputPosition - position;
377  position = outputPosition;
378 
379  switch (data.recordType)
380  {
381  case 0 :
382  EndSection();
383  return;
384  case 1 :
385  data.refBase = backTranslateBase[data.refBase];
386  ifwrite(handle, &data, sizeof(data));
387  data.refBase = translateBase[data.refBase];
388  return;
389  case 2 :
390  data.refBase = backTranslateBase[data.refBase];
391  ifwrite(handle, &data, sizeof(data) - 3);
392  data.refBase = translateBase[data.refBase];
393 
394  ifwrite(handle, (void *)(const char *) indelSequence[0], abs(data.indel.length[0]));
395  ifwrite(handle, (void *)(const char *) indelSequence[1], abs(data.indel.length[1]));
396  return;
397  }
398 }
399 
glfEntry::offset
unsigned int offset
offset of this record from the previous one, in bases
Definition: glfHandler.h:48
String
Definition: StringBasics.h:38
ifopen
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition: InputFile.h:562
glfEntry::depth
unsigned depth
log10 minimum likelihood * 10 and the number of mapped reads
Definition: glfHandler.h:51
ifeof
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
ifrewind
void ifrewind(IFILE file)
Reset to the beginning of the file (cannot be done for stdin/stdout).
Definition: InputFile.h:642
glfEntry::lk
unsigned char lk[10]
log10 likelihood ratio * 10 for genotypes AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
Definition: glfHandler.h:59
ifread
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
glfEntry::refBase
unsigned char refBase
"XACMGRSVTWYHKDBN"[ref_base] gives the reference base
Definition: glfHandler.h:45
glfEntry::mapQuality
unsigned char mapQuality
root mean squared maximum mapping quality for overlapping reads
Definition: glfHandler.h:54
ifwrite
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition: InputFile.h:669
ifclose
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580
InputFile::BGZF
@ BGZF
bgzf file.
Definition: InputFile.h:48
glfEntry
Definition: glfHandler.h:42