NFFT 3.5.3alpha
sort.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19
20#ifdef _OPENMP
21#include <omp.h>
22#endif
23
24#include "infft.h"
25
26#define z_swap(_a_, _b_, _t_) do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0)
27
33static inline void sort_node_indices_sort_bubble(INT n, INT *keys)
34{
35 INT i, j, ti;
36
37 for (i = 0; i < n; ++i)
38 {
39 j = i;
40 while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
41 {
42 z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
43 z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
44 --j;
45 }
46 }
47}
48
54static inline void sort_node_indices_radix_count(INT n, INT *keys, INT shift, INT mask, INT *counts)
55{
56 INT i, k;
57
58 for (i = 0; i < n; ++i)
59 {
60 k = (keys[2 * i + 0] >> shift) & mask;
61 ++counts[k];
62 }
63}
64
70static inline void sort_node_indices_radix_rearrange(INT n, INT *keys_in, INT *keys_out, INT shift, INT mask, INT *displs)
71{
72 INT i, k;
73
74 for (i = 0; i < n; ++i)
75 {
76 k = (keys_in[2 * i + 0] >> shift) & mask;
77 keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
78 keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
79 ++displs[k];
80 }
81}
82
83#define rwidth 9
84#define radix_n (1 << rwidth)
85
91void Y(sort_node_indices_radix_lsdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
92{
93 const INT radix_mask = radix_n - 1;
94 const INT rhigh_in = rhigh;
95
96 const INT tmax =
97#ifdef _OPENMP
98 omp_get_max_threads();
99#else
100 1;
101#endif
102
103 INT *from, *to, *tmp;
104
105 INT i, k, l, h;
106 INT *lcounts;
107
108 INT tid = 0, tnum = 1;
109
110 STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
111
112 from = keys0;
113 to = keys1;
114
115 while (rhigh >= 0)
116 {
117#ifdef _OPENMP
118 #pragma omp parallel private(tid, tnum, i, l, h)
119 {
120 tid = omp_get_thread_num();
121 tnum = omp_get_num_threads();
122#endif
123
124 for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
125
126 l = (tid * n) / tnum;
127 h = ((tid + 1) * n) / tnum;
128
129 sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
130#ifdef _OPENMP
131 }
132#endif
133
134 k = 0;
135 for (i = 0; i < radix_n; ++i)
136 {
137 for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
138 }
139
140#ifdef _OPENMP
141 #pragma omp parallel private(tid, tnum, i, l, h)
142 {
143 tid = omp_get_thread_num();
144 tnum = omp_get_num_threads();
145#endif
146
147 l = (tid * n) / tnum;
148 h = ((tid + 1) * n) / tnum;
149
150 sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
151#ifdef _OPENMP
152 }
153#endif
154
155/* print_keys(n, to);*/
156
157 tmp = from;
158 from = to;
159 to = tmp;
160
161 rhigh -= rwidth;
162 }
163
164 if (to == keys0) memcpy(to, from, (size_t)(n) * 2 * sizeof(INT));
165
166 STACK_FREE(lcounts);
167}
168
174void Y(sort_node_indices_radix_msdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
175{
176 const INT radix_mask = radix_n - 1;
177
178 const INT tmax =
179#ifdef _OPENMP
180 omp_get_max_threads();
181#else
182 1;
183#endif
184
185 INT i, k, l, h;
186 INT *lcounts;
187
188 INT counts[radix_n], displs[radix_n];
189
190 INT tid = 0, tnum = 1;
191
192 STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
193
194 rhigh -= rwidth;
195
196#ifdef _OPENMP
197 #pragma omp parallel private(tid, tnum, i, l, h)
198 {
199 tid = omp_get_thread_num();
200 tnum = omp_get_num_threads();
201#endif
202
203 for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
204
205 l = (tid * n) / tnum;
206 h = ((tid + 1) * n) / tnum;
207
208 sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
209#ifdef _OPENMP
210 }
211#endif
212
213 k = 0;
214 for (i = 0; i < radix_n; ++i)
215 {
216 for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
217
218 displs[i] = lcounts[0 * radix_n + i];
219 if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
220 }
221 counts[radix_n - 1] = n - displs[radix_n - 1];
222
223#ifdef _OPENMP
224 #pragma omp parallel private(tid, tnum, i, l, h)
225 {
226 tid = omp_get_thread_num();
227 tnum = omp_get_num_threads();
228#endif
229
230 l = (tid * n) / tnum;
231 h = ((tid + 1) * n) / tnum;
232
233 sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
234#ifdef _OPENMP
235 }
236#endif
237
238 memcpy(keys0, keys1, (size_t)(n) * 2 * sizeof(INT));
239
240 if (rhigh >= 0)
241 {
242 for (i = 0; i < radix_n; ++i)
243 {
244 if (counts[i] > 1)
245 {
246 if (counts[i] > 256)
247 Y(sort_node_indices_radix_msdf)(counts[i], keys0 + 2 * displs[i], keys1 + 2 * displs[i], rhigh);
248 else
249 sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
250 }
251 }
252 }
253
254 STACK_FREE(lcounts);
255}
Internal header file for auxiliary definitions and functions.