libStatGen Software  1
PedigreeTrim.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "Pedigree.h"
19 
20 void Pedigree::ShowTrimHeader(bool & flag)
21 {
22  if (flag)
23  {
24  printf("Trimming uninformative individuals...\n");
25  flag = false;
26  }
27 }
28 
29 void Pedigree::Trim(bool quiet, int * informative)
30 {
31  int newCount = 0;
32  Person ** newPersons = new Person * [count];
33 
34  // This function applies the following filters to reduce complexity
35  // of pedigree
36  //
37  // RULE 1: Remove all pedigrees no genotype or phenotype data
38  // RULE 2: Remove leaf individuals with no data
39  // RULE 3: Remove founder couples with <2 offspring and no data
40 
41  bool showHeader = true;
42  IntArray discardable, offspring, mates, haveData;
43 
44  for (int f = 0; f < familyCount; f++)
45  {
46  Family * fam = families[f];
47 
48  // Cache for storing indicators about whether each family member is
49  // informative
50  haveData.Dimension(fam->count);
51 
52  // Check that some data is available in the family
53  int hasData = false;
54  for (int i = fam->first; i <= fam->last; i++)
55  if (informative == NULL)
56  hasData |= haveData[persons[i]->traverse] = persons[i]->haveData();
57  else
58  hasData |= haveData[persons[i]->traverse] = informative[i];
59 
60  if (!hasData)
61  {
62  if (!quiet)
63  {
64  ShowTrimHeader(showHeader);
65  printf(" Removing family %s: No data\n", (const char *) fam->famid);
66  }
67 
68  for (int i = fam->first; i <= fam->last; i++)
69  delete persons[i];
70 
71  continue;
72  }
73 
74  // Assume that we need everyone in the family
75  discardable.Dimension(fam->count);
76  discardable.Set(0);
77 
78  bool trimming = true;
79 
80  while (trimming)
81  {
82  trimming = false;
83 
84  // Tally the number of offspring for each individual
85  offspring.Dimension(fam->count);
86  offspring.Zero();
87 
88  // Tally the number of mates for each individual
89  mates.Dimension(fam->count);
90  mates.Set(-1);
91 
92  // In the first round, we count the number of offspring
93  // for each individual in the current trimmed version of the
94  // pedigree
95  for (int i = fam->count - 1; i >= fam->founders; i--)
96  {
97  if (discardable[i]) continue;
98 
99  Person & p = *(persons[fam->path[i]]);
100 
101  if (discardable[p.father->traverse])
102  continue;
103 
104  if (offspring[i] == 0 && !haveData[p.traverse])
105  {
106  trimming = true;
107  discardable[i] = true;
108  continue;
109  }
110 
111  int father = p.father->traverse;
112  int mother = p.mother->traverse;
113 
114  if (mates[father] == -1 && mates[mother] == -1)
115  {
116  mates[father] = mother,
117  mates[mother] = father;
118  }
119  else if (mates[father] != mother)
120  {
121  if (mates[father] >= 0)
122  mates[mates[father]] = -2;
123 
124  if (mates[mother] >= 0)
125  mates[mates[mother]] = -2;
126 
127  mates[mother] = -2;
128  mates[father] = -2;
129  }
130 
131  offspring[father]++;
132  offspring[mother]++;
133  }
134 
135  // In the second pass, we remove individuals with no
136  // data who are founders with a single offspring (and
137  // no multiple matings) or who have no descendants
138  for (int i = fam->count - 1; i >= 0; i--)
139  {
140  if (discardable[i]) continue;
141 
142  Person & p = *(persons[fam->path[i]]);
143 
144  if (p.isFounder() || discardable[p.father->traverse])
145  {
146  if (mates[i] == -2 ||
147  offspring[i] > 1 ||
148  (mates[i] >= fam->founders &&
149  !discardable[persons[fam->path[mates[i]]]->father->traverse]) ||
150  haveData[p.traverse] ||
151  (mates[i] != -1 && haveData[mates[i]]))
152  continue;
153 
154  trimming = true;
155  discardable[i] = true;
156  continue;
157  }
158  }
159  }
160 
161  for (int i = fam->count - 1; i >= 0; i--)
162  if (discardable[i])
163  {
164  if (!quiet)
165  {
166  ShowTrimHeader(showHeader);
167  printf(" Removing person %s->%s: No data\n",
168  (const char *) fam->famid,
169  (const char *) persons[fam->path[i]]->pid);
170  }
171  delete persons[fam->path[i]];
172  }
173  else
174  newPersons[newCount++] = persons[fam->path[i]];
175  }
176 
177  if (!showHeader)
178  printf("\n");
179 
180  delete [] persons;
181 
182  persons = newPersons;
183  count = newCount;
184  Sort();
185 }
186 
187 
IntArray
Definition: IntArray.h:23
Family
Definition: PedigreeFamily.h:27
Person
Definition: PedigreePerson.h:31