libStatGen Software  1
PedigreeFamily.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "Pedigree.h"
19 #include "Constant.h"
20 #include "MathConstant.h"
21 #include "Error.h"
22 
23 #include <stdlib.h>
24 #include <ctype.h>
25 #include <string.h>
26 #include <limits.h>
27 
28 Family::Family(Pedigree & pedigree, int _first, int _last, int _serial) :
29  ped(pedigree)
30 {
31  serial = _serial;
32  first = _first;
33  last = _last;
34  count = last - first + 1;
35  path = new int [count];
36  famid = ped[first].famid;
37 
38  founders = mzTwins = 0;
39 
40  for (int i=first; i<=last; i++)
41  if (ped[i].isFounder())
42  {
43  ped[i].traverse = founders;
44  path[founders++] = ped[i].serial;
45  }
46  else
47  {
48  ped[i].traverse = -1;
49  if (ped[i].isMzTwin(ped[i]))
50  for (int j = first; j < i; j++)
51  if (ped[i].isMzTwin(ped[j]))
52  {
53  mzTwins++;
54  break;
55  }
56  }
57 
58  nonFounders = count - founders;
59  generations = nonFounders == 0 ? 1 : 2;
60 
61  int next = founders;
62  while (next < count)
63  {
64  bool check = false;
65 
66  // Create traversal where path ancestors precede their offspring
67  for (int i=first; i<=last; i++)
68  if (ped[i].traverse == -1)
69  {
70  int fatherSerial = ped[i].father->traverse;
71  int motherSerial = ped[i].mother->traverse;
72 
73  if (fatherSerial >= 0 && motherSerial >= 0)
74  {
75  check = true;
76 
77  ped[i].traverse = next;
78  path[next++] = i;
79 
80  if (fatherSerial >= founders || motherSerial >= founders)
81  generations = 3;
82 
83  // If this individual is part of a set of MZ twins
84  if (ped[i].zygosity & 1)
85  for (int j = 0; j < ped[i].sibCount; j++)
86  {
87  Person & sib = *ped[i].sibs[j];
88 
89  // Insert all co-twins at the same position in traversal
90  // order
91  if (sib.traverse == -1 && ped[i].zygosity == sib.zygosity)
92  {
93  sib.traverse = next;
94  path[next++] = sib.serial;
95  }
96  }
97  }
98  }
99 
100  if (!check) ShowInvalidCycles();
101  }
102 }
103 
104 Family::~Family()
105 {
106  delete [] path;
107 }
108 
109 void Family::ShowInvalidCycles()
110 {
111  // Try and identify key individuals responsible for
112  // pedigree mess-up ... when this function is called
113  // pedigree has been traversed top-down and individuals
114  // that are correctly specified have IDs of >= 0.
115 
116  // This routine traverses the pedigree bottom up to
117  // identify a subset of individuals likely to be causing
118  // the problem
119  IntArray descendants(ped.count);
120  descendants.Zero();
121 
122  for (int i = first; i <= last; i++)
123  if (ped[i].traverse == -1)
124  {
125  descendants[ped[i].father->serial]++;
126  descendants[ped[i].mother->serial]++;
127  }
128 
129  IntArray stack;
130 
131  for (int i = first; i <= last; i++)
132  if (ped[i].traverse == -1 && descendants[i] == 0)
133  {
134  stack.Push(i);
135 
136  do
137  {
138  int j = stack.Pop();
139 
140  if (ped[j].traverse != -1) continue;
141 
142  ped[j].traverse = 9999;
143 
144  if (--descendants[ped[j].father->serial] == 0)
145  stack.Push(ped[j].father->serial);
146  if (--descendants[ped[j].mother->serial] == 0)
147  stack.Push(ped[j].mother->serial);
148  }
149  while (stack.Length());
150  }
151 
152  printf("The structure of family %s requires\n"
153  "an individual to be his own ancestor.\n\n"
154  "To identify the problem(s), examine the\n"
155  "following key individuals:\n\n",
156  (const char *) famid);
157 
158  for (int i = first; i <= last; i++)
159  if (ped[i].traverse == -1)
160  printf("Problem Person: %s\n", (const char *) ped[i].pid);
161 
162  error("Invalid pedigree structure.");
163 }
164 
165 int Family::ConnectedGroups(IntArray * groupMembership)
166 {
167  IntArray groups(count);
168 
169  // Use the quick union algorithm to identify connected groups
170  groups.SetSequence(0, 1);
171  for (int i = count - 1; i >= founders; i--)
172  {
173  // Lookup parents
174  int group0 = i;
175  int group1 = ped[path[i]].father->traverse;
176  int group2 = ped[path[i]].mother->traverse;
177 
178  // Identify their corresponding groupings
179  while (groups[group0] != group0) group0 = groups[group0];
180  while (groups[group1] != group1) group1 = groups[group1];
181  while (groups[group2] != group2) group2 = groups[group2];
182 
183  int group = group1 < group2 ? group1 : group2;
184  if (group0 < group) group = group0;
185 
186  groups[group0] = groups[group1] = groups[group2] = group;
187  }
188 
189  // Count groupings
190  int groupCount = 0;
191  for (int i = 0; i < founders; i++)
192  if (groups[i] == i)
193  groupCount++;
194 
195  if (groupMembership == NULL)
196  return groupCount;
197 
198  // Flatten tree so all items point to root
199  for (int i = 1; i < count; i++)
200  groups[i] = groups[groups[i]];
201 
202  // Update group membership info
203  int group = 0;
204  groupMembership->Dimension(count);
205  for (int i = 0; i < count; i++)
206  if (groups[i] == i)
207  (*groupMembership)[i] = ++group;
208  else
209  (*groupMembership)[i] = (*groupMembership)[groups[i]];
210 
211 #if 0
212  // This stretch of code outputs family structure and group membership
213  // And should usually be commented out!
214  for (int j = first; j <= last; j++)
215  printf("%s %s %s %s %d %d\n",
216  (const char *) famid, (const char *) ped[j].pid,
217  (const char *) ped[j].fatid, (const char *) ped[j].motid,
218  ped[j].sex, groups[ped[j].traverse]);
219 #endif
220 
221  return groupCount;
222 }
223 
224 /*
225 int Family::ConnectedGroups(IntArray * groupMembership)
226  {
227  IntArray * stack = new IntArray[count];
228  IntArray groups(count);
229 
230  groups.Zero();
231 
232  int group = 0;
233  int seed = count - 1;
234 
235  // Search for connected sets of individuals until everyone is accounted for
236  while (true)
237  {
238  while ((seed >= 0) && (groups[seed] != 0))
239  seed--;
240 
241  if (seed == -1)
242  break;
243 
244  Mark(seed, ++group, stack, groups);
245 
246  for (int j = seed; j >= founders; j--)
247  if (groups[j] == 0)
248  {
249  int fat_j = ped[path[j]].father->traverse;
250  int mot_j = ped[path[j]].mother->traverse;
251 
252  if (groups[fat_j] == group || groups[mot_j] == group)
253  Mark(j, group, stack, groups);
254  else
255  stack[mot_j].Push(j),
256  stack[fat_j].Push(j);
257  }
258 
259  for (int j = 0; j < count; j++)
260  stack[j].Clear();
261  }
262 
263  if (groupMembership != NULL)
264  (*groupMembership) = groups;
265 
266  // This stretch of code outputs family structure and group membership
267  // And should usually be commented out!
268 #if 0
269  for (int j = first; j <= last; j++)
270  printf("%s %s %s %s %d %d\n",
271  (const char *) famid, (const char *) ped[j].pid,
272  (const char *) ped[j].fatid, (const char *) ped[j].motid,
273  ped[j].sex, groups[ped[j].traverse]);
274 #endif
275 
276  delete [] stack;
277 
278  return group;
279  }
280 
281 void Family::Mark(int j, int group, IntArray * stack, IntArray & groups)
282  {
283  if (groups[j] == group) return;
284 
285  groups[j] = group;
286 
287  while (stack[j].Length())
288  Mark(stack[j].Pop(), group, stack, groups);
289 
290  if (j < founders) return;
291 
292  Mark(ped[path[j]].father->traverse, group, stack, groups);
293  Mark(ped[path[j]].mother->traverse, group, stack, groups);
294  }
295 */
Pedigree
Definition: Pedigree.h:32
IntArray
Definition: IntArray.h:23
Person
Definition: PedigreePerson.h:31