libStatGen Software  1
QuickIndex.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "QuickIndex.h"
19 #include "Error.h"
20 
21 #define __QI_INVALID 0
22 #define __QI_VECTOR 1
23 #define __QI_INTARRAY 2
24 #define __QI_STRINGARRAY 3
25 
26 QuickIndex::QuickIndex()
27 {
28  source = NULL;
29  datatype = __QI_INVALID;
30 }
31 
32 void QuickIndex::Index(const IntArray & source_data)
33 {
34  source = (const void *) &source_data;
35  datatype = __QI_INTARRAY;
36 
37  Dimension(source_data.Length());
38  SetSequence();
39  Sort();
40 }
41 
42 void QuickIndex::Index(const Vector & source_data)
43 {
44  source = (const void *) &source_data;
45  datatype = __QI_VECTOR;
46 
47  Dimension(source_data.Length());
48  SetSequence();
49  Sort();
50 }
51 
52 void QuickIndex::Index(const StringArray & source_data)
53 {
54  source = (const void *) &source_data;
55  datatype = __QI_STRINGARRAY;
56 
57  Dimension(source_data.Length());
58  SetSequence();
59  Sort();
60 }
61 
62 void QuickIndex::IndexCounts(const StringIntMap & source_data)
63 {
64  IntArray counts(source_data.Length());
65 
66  for (int i = 0; i < source_data.Length(); i++)
67  counts[i] = source_data.GetCount(i);
68 
69  Index(counts);
70 }
71 
72 void QuickIndex::IndexCounts(const StringIntHash & source_data)
73 {
74  IntArray counts(source_data.Capacity());
75 
76  for (int i = 0; i < source_data.Capacity(); i++)
77  if (source_data.SlotInUse(i))
78  counts[i] = source_data.Integer(i);
79  else
80  counts[i] = -1;
81 
82  Index(counts);
83 
84  Reverse();
85  Dimension(source_data.Entries());
86  Reverse();
87 }
88 
89 bool QuickIndex::IsBefore(int i, int j)
90 {
91  i = (*this)[i];
92  j = (*this)[j];
93 
94  switch (datatype)
95  {
96  case __QI_VECTOR :
97  {
98  const Vector & data = * (const Vector *) source;
99  return data[i] < data[j];
100  }
101  case __QI_INTARRAY :
102  {
103  const IntArray & data = * (const IntArray *) source;
104  return data[i] < data[j];
105  }
106  case __QI_STRINGARRAY :
107  {
108  const StringArray & data = * (const StringArray *) source;
109  return data[i].SlowCompare(data[j]) < 0;
110  }
111  }
112  return 0;
113 }
114 
115 void QuickIndex::Sort()
116 {
117  struct __QuickIndexStack
118  {
119  int left, right;
120  };
121 
122  if (Length() <= 1)
123  return;
124 
125  // Create a pseudo-stack to avoid recursion
126  __QuickIndexStack stack[32];
127 
128  int stackIdx = 0;
129 
130  // Size of minimum partition to median of three
131  const int Threshold = 7;
132 
133  // current partitions
134  int lsize, rsize;
135  int l, mid, r;
136  int scanl, scanr, pivot;
137 
138  l = 0;
139  r = Length() - 1;
140 
141  while (1)
142  {
143  while (r > l)
144  {
145  if (r - l > Threshold)
146  // QuickSort : median of three partitioning
147  {
148  mid = (r + l) / 2;
149 
150  // sort l, mid, and r
151  if (IsBefore(mid, l))
152  Swap(mid, l);
153 
154  if (IsBefore(r, l))
155  Swap(r, l);
156 
157  if (IsBefore(r, mid))
158  Swap(r, mid);
159 
160  // set up for partitioning...
161  pivot = r - 1;
162 
163  Swap(mid, pivot);
164 
165  scanl = l + 1;
166  scanr = r - 2;
167  }
168  else
169  {
170  // set up random partition -- faster
171  pivot = r;
172  scanl = l;
173  scanr = r - 1;
174  }
175 
176  while (1)
177  {
178  // scan from left for element >= pivot
179  while ((scanl < r) && IsBefore(scanl, pivot))
180  ++scanl;
181 
182  while ((scanr > l) && IsBefore(pivot, scanr))
183  --scanr;
184 
185  // if scans have met, we are done
186  if (scanl >= scanr)
187  break;
188 
189  Swap(scanl, scanr);
190 
191  if (scanl < r)
192  ++scanl;
193 
194  if (scanr > l)
195  --scanr;
196  }
197 
198  // Exchange final element
199  Swap(pivot, scanl);
200 
201  // Place largest partition on stack
202  lsize = scanl - l;
203  rsize = r - scanl;
204 
205  if (lsize > rsize)
206  {
207  // if size is one we are done
208  ++ stackIdx;
209 
210  stack[stackIdx].left = l;
211  stack[stackIdx].right = scanl - 1;
212 
213  if (rsize != 0)
214  l = scanl + 1;
215  else
216  break;
217  }
218  else
219  {
220  // if size is one we are done
221  ++ stackIdx;
222 
223  stack[stackIdx].left = scanl + 1;
224  stack[stackIdx].right = r;
225 
226  if (lsize != 0)
227  r = scanl - 1;
228  else
229  break;
230  }
231  }
232 
233  // iterate with values from stack
234  if (stackIdx)
235  {
236  l = stack[stackIdx].left;
237  r = stack[stackIdx].right;
238 
239  --stackIdx;
240  }
241  else
242  break;
243  }
244 }
245 
246 
247 
248 
249 
250 
251 
StringIntMap
Definition: StringMap.h:92
StringIntHash
Definition: StringHash.h:193
IntArray
Definition: IntArray.h:23
Vector
Definition: MathVector.h:28
StringArray
Definition: StringArray.h:23