NFFT 3.5.3alpha
nfst.c
1/*
2 * Copyright (c) 2002, 2020 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19/* Nonequispaced fast cosine transform */
20
21/* Author: Steffen Klatt 2004-2006, Jens Keiner 2010 */
22
23/* configure header */
24#include "config.h"
25
26/* complex datatype (maybe) */
27#ifdef HAVE_COMPLEX_H
28#include<complex.h>
29#endif
30
31/* NFFT headers */
32#include "nfft3.h"
33#include "infft.h"
34
35#ifdef _OPENMP
36#include <omp.h>
37#endif
38
39#ifdef OMP_ASSERT
40#include <assert.h>
41#endif
42
43#undef X
44#define X(name) NFST(name)
45
47static inline INT intprod(const INT *vec, const INT a, const INT d)
48{
49 INT t, p;
50
51 p = 1;
52 for (t = 0; t < d; t++)
53 p *= vec[t] - a;
54
55 return p;
56}
57
58/* handy shortcuts */
59#define BASE(x) SIN(x)
60#define NN(x) (x + 1)
61#define OFFSET 1
62#define FOURIER_TRAFO FFTW_RODFT00
63#define FFTW_DEFAULT_FLAGS FFTW_ESTIMATE | FFTW_DESTROY_INPUT
64
65#define NODE(p,r) (ths->x[(p) * ths->d + (r)])
66
67#define MACRO_with_FG_PSI fg_psi[t][lj[t]]
68#define MACRO_with_PRE_PSI ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj[t]]
69#define MACRO_without_PRE_PSI PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + t]) \
70 - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
71#define MACRO_compute_PSI PHI((2 * NN(ths->n[t])), (NODE(j,t) - ((R)(lj[t] + u[t])) / (K(2.0) * ((R)NN(ths->n[t])))), t)
72
88void X(trafo_direct)(const X(plan) *ths)
89{
90 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
91
92 memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
93
94 if (ths->d == 1)
95 {
96 /* specialize for univariate case, rationale: faster */
97 INT j;
98#ifdef _OPENMP
99 #pragma omp parallel for default(shared) private(j)
100#endif
101 for (j = 0; j < ths->M_total; j++)
102 {
103 INT k_L;
104 for (k_L = 0; k_L < ths->N_total; k_L++)
105 {
106 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
107 f[j] += f_hat[k_L] * BASE(omega);
108 }
109 }
110 }
111 else
112 {
113 /* multivariate case */
114 INT j;
115#ifdef _OPENMP
116 #pragma omp parallel for default(shared) private(j)
117#endif
118 for (j = 0; j < ths->M_total; j++)
119 {
120 R x[ths->d], omega, Omega[ths->d + 1];
121 INT t, t2, k_L, k[ths->d];
122 Omega[0] = K(1.0);
123 for (t = 0; t < ths->d; t++)
124 {
125 k[t] = OFFSET;
126 x[t] = K2PI * ths->x[j * ths->d + t];
127 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
128 }
129 omega = Omega[ths->d];
130
131 for (k_L = 0; k_L < ths->N_total; k_L++)
132 {
133 f[j] += f_hat[k_L] * omega;
134 {
135 for (t = ths->d - 1; (t >= 1) && (k[t] == (ths->N[t] - 1)); t--)
136 k[t] = OFFSET;
137
138 k[t]++;
139
140 for (t2 = t; t2 < ths->d; t2++)
141 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
142
143 omega = Omega[ths->d];
144 }
145 }
146 }
147 }
148}
149
150void X(adjoint_direct)(const X(plan) *ths)
151{
152 R *f_hat = (R*)ths->f_hat, *f = (R*)ths->f;
153
154 memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
155
156 if (ths->d == 1)
157 {
158 /* specialize for univariate case, rationale: faster */
159#ifdef _OPENMP
160 INT k_L;
161 #pragma omp parallel for default(shared) private(k_L)
162 for (k_L = 0; k_L < ths->N_total; k_L++)
163 {
164 INT j;
165 for (j = 0; j < ths->M_total; j++)
166 {
167 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
168 f_hat[k_L] += f[j] * BASE(omega);
169 }
170 }
171#else
172 INT j;
173 for (j = 0; j < ths->M_total; j++)
174 {
175 INT k_L;
176 for (k_L = 0; k_L < ths->N_total; k_L++)
177 {
178 R omega = K2PI * ((R)(k_L + OFFSET)) * ths->x[j];
179 f_hat[k_L] += f[j] * BASE(omega);
180 }
181 }
182#endif
183 }
184 else
185 {
186 /* multivariate case */
187 INT j, k_L;
188#ifdef _OPENMP
189 #pragma omp parallel for default(shared) private(j, k_L)
190 for (k_L = 0; k_L < ths->N_total; k_L++)
191 {
192 INT k[ths->d], k_temp, t;
193
194 k_temp = k_L;
195
196 for (t = ths->d - 1; t >= 0; t--)
197 {
198 k[t] = k_temp % ths->N[t];
199 k_temp /= ths->N[t];
200 }
201
202 for (j = 0; j < ths->M_total; j++)
203 {
204 R omega = K(1.0);
205 for (t = 0; t < ths->d; t++)
206 omega *= BASE(K2PI * (k[t] + OFFSET) * ths->x[j * ths->d + t]);
207 f_hat[k_L] += f[j] * omega;
208 }
209 }
210#else
211 for (j = 0; j < ths->M_total; j++)
212 {
213 R x[ths->d], omega, Omega[ths->d+1];
214 INT t, t2, k[ths->d];
215 Omega[0] = K(1.0);
216 for (t = 0; t < ths->d; t++)
217 {
218 k[t] = OFFSET;
219 x[t] = K2PI * ths->x[j * ths->d + t];
220 Omega[t+1] = BASE(((R)(k[t])) * x[t]) * Omega[t];
221 }
222 omega = Omega[ths->d];
223 for (k_L = 0; k_L < ths->N_total; k_L++)
224 {
225 f_hat[k_L] += f[j] * omega;
226
227 for (t = ths->d-1; (t >= 1) && (k[t] == ths->N[t] - 1); t--)
228 k[t] = OFFSET;
229
230 k[t]++;
231
232 for (t2 = t; t2 < ths->d; t2++)
233 Omega[t2+1] = BASE(((R)(k[t2])) * x[t2]) * Omega[t2];
234
235 omega = Omega[ths->d];
236 }
237 }
238#endif
239 }
240}
241
261static inline void uo(const X(plan) *ths, const INT j, INT *up, INT *op,
262 const INT act_dim)
263{
264 const R xj = ths->x[j * ths->d + act_dim];
265 INT c = LRINT(xj * (2 * NN(ths->n[(act_dim)])));
266
267 (*up) = c - (ths->m);
268 (*op) = c + 1 + (ths->m);
269}
270
271#define MACRO_D_compute_A \
272{ \
273 g_hat[kg_plain[ths->d]] = f_hat[k_L] * c_phi_inv_k[ths->d]; \
274}
275
276#define MACRO_D_compute_T \
277{ \
278 f_hat[k_L] = g_hat[kg_plain[ths->d]] * c_phi_inv_k[ths->d]; \
279}
280
281#define MACRO_D_init_result_A memset(g_hat, 0, (size_t)(ths->n_total) * sizeof(R));
282
283#define MACRO_D_init_result_T memset(f_hat, 0, (size_t)(ths->N_total) * sizeof(R));
284
285#define MACRO_with_PRE_PHI_HUT ths->c_phi_inv[t][kg[t]]
286
287#define MACRO_compute_PHI_HUT_INV (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), kg[t] + OFFSET, t)))
288
289#define MACRO_init_k_ks \
290{ \
291 for (t = 0; t < ths->d; t++) \
292 { \
293 kg[t] = 0; \
294 } \
295 i = 0; \
296}
297
298#define MACRO_update_c_phi_inv_k(what_kind, which_phi) \
299{ \
300 for (t = i; t < ths->d; t++) \
301 { \
302 MACRO_update_c_phi_inv_k_ ## what_kind(which_phi); \
303 kg_plain[t+1] = kg_plain[t] * ths->n[t] + kg[t]; \
304 } \
305}
306
307#define MACRO_update_c_phi_inv_k_A(which_phi) \
308{ \
309 c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
310}
311
312#define MACRO_update_c_phi_inv_k_T(which_phi) \
313{ \
314 c_phi_inv_k[t+1] = K(0.5) * c_phi_inv_k[t] * MACRO_ ## which_phi; \
315}
316
317#define MACRO_count_k_ks \
318{ \
319 kg[ths->d - 1]++; \
320 i = ths->d - 1; \
321\
322 while ((kg[i] == ths->N[i] - 1) && (i > 0)) \
323 { \
324 kg[i - 1]++; \
325 kg[i] = 0; \
326 i--; \
327 } \
328}
329
330/* sub routines for the fast transforms matrix vector multiplication with D, D^T */
331#define MACRO_D(which_one) \
332static inline void D_ ## which_one (X(plan) *ths) \
333{ \
334 R *g_hat, *f_hat; /* local copy */ \
335 R c_phi_inv_k[ths->d+1]; /* postfix product of PHI_HUT */ \
336 INT t; /* index dimensions */ \
337 INT i; \
338 INT k_L; /* plain index */ \
339 INT kg[ths->d]; /* multi index in g_hat */ \
340 INT kg_plain[ths->d+1]; /* postfix plain index */ \
341\
342 f_hat = (R*)ths->f_hat; g_hat = (R*)ths->g_hat; \
343 MACRO_D_init_result_ ## which_one; \
344\
345 c_phi_inv_k[0] = K(1.0); \
346 kg_plain[0] = 0; \
347\
348 MACRO_init_k_ks; \
349\
350 if (ths->flags & PRE_PHI_HUT) \
351 { \
352 for (k_L = 0; k_L < ths->N_total; k_L++) \
353 { \
354 MACRO_update_c_phi_inv_k(which_one, with_PRE_PHI_HUT); \
355 MACRO_D_compute_ ## which_one; \
356 MACRO_count_k_ks; \
357 } \
358 } \
359 else \
360 { \
361 for (k_L = 0; k_L < ths->N_total; k_L++) \
362 { \
363 MACRO_update_c_phi_inv_k(which_one,compute_PHI_HUT_INV); \
364 MACRO_D_compute_ ## which_one; \
365 MACRO_count_k_ks; \
366 } \
367 } \
368}
369
370MACRO_D(A)
371MACRO_D(T)
372
373/* sub routines for the fast transforms matrix vector multiplication with B, B^T */
374#define MACRO_B_init_result_A memset(f, 0, (size_t)(ths->M_total) * sizeof(R));
375#define MACRO_B_init_result_T memset(g, 0, (size_t)(ths->n_total) * sizeof(R));
376
377#define MACRO_B_PRE_FULL_PSI_compute_A \
378{ \
379 (*fj) += ths->psi[ix] * g[ths->psi_index_g[ix]]; \
380}
381
382#define MACRO_B_PRE_FULL_PSI_compute_T \
383{ \
384 g[ths->psi_index_g[ix]] += ths->psi[ix] * (*fj); \
385}
386
387#define MACRO_B_compute_A \
388{ \
389 (*fj) += phi_prod[ths->d] * g[ll_plain[ths->d]]; \
390}
391
392#define MACRO_B_compute_T \
393{ \
394 g[ll_plain[ths->d]] += phi_prod[ths->d] * (*fj); \
395}
396
397#define MACRO_init_uo_l_lj_t \
398{ \
399 for (t2 = 0; t2 < ths->d; t2++) \
400 { \
401 uo(ths, j, &u[t2], &o[t2], t2); \
402 \
403 /* determine index in g-array corresponding to u[(t2)] */ \
404 if (u[(t2)] < 0) \
405 lg_offset[(t2)] = \
406 (u[(t2)] % (2 * NN(ths->n[(t2)]))) + (2 * NN(ths->n[(t2)])); \
407 else \
408 lg_offset[(t2)] = u[(t2)] % (2 * NN(ths->n[(t2)])); \
409 if (lg_offset[(t2)] > NN(ths->n[(t2)])) \
410 lg_offset[(t2)] = -(2 * NN(ths->n[(t2)]) - lg_offset[(t2)]); \
411 \
412 if (lg_offset[t2] <= 0) \
413 { \
414 l[t2] = -lg_offset[t2]; \
415 count_lg[t2] = -1; \
416 } \
417 else \
418 { \
419 l[t2] = +lg_offset[t2]; \
420 count_lg[t2] = +1; \
421 } \
422 \
423 lj[t2] = 0; \
424 } \
425 t2 = 0; \
426}
427
428#define FOO_A ((R)count_lg[t])
429
430#define FOO_T ((R)count_lg[t])
431
432#define MACRO_update_phi_prod_ll_plain(which_one,which_psi) \
433{ \
434 for (t = t2; t < ths->d; t++) \
435 { \
436 if ((l[t] != 0) && (l[t] != NN(ths->n[t]))) \
437 { \
438 phi_prod[t+1] = (FOO_ ## which_one) * phi_prod[t] * (MACRO_ ## which_psi); \
439 ll_plain[t+1] = ll_plain[t] * ths->n[t] + l[t] - 1; \
440 } \
441 else \
442 { \
443 phi_prod[t + 1] = K(0.0); \
444 ll_plain[t+1] = ll_plain[t] * ths->n[t]; \
445 } \
446 } \
447}
448
449#define MACRO_count_uo_l_lj_t \
450{ \
451 /* turn around if we hit one of the boundaries */ \
452 if ((l[(ths->d-1)] == 0) || (l[(ths->d-1)] == NN(ths->n[(ths->d-1)]))) \
453 count_lg[(ths->d-1)] *= -1; \
454 \
455 /* move array index */ \
456 l[(ths->d-1)] += count_lg[(ths->d-1)]; \
457 \
458 lj[ths->d - 1]++; \
459 t2 = ths->d - 1; \
460 \
461 while ((lj[t2] == (2 * ths->m + 2)) && (t2 > 0)) \
462 { \
463 lj[t2 - 1]++; \
464 lj[t2] = 0; \
465 /* ansonsten lg[i-1] verschieben */ \
466 \
467 /* turn around if we hit one of the boundaries */ \
468 if ((l[(t2 - 1)] == 0) || (l[(t2 - 1)] == NN(ths->n[(t2 - 1)]))) \
469 count_lg[(t2 - 1)] *= -1; \
470 /* move array index */ \
471 l[(t2 - 1)] += count_lg[(t2 - 1)]; \
472 \
473 /* lg[i] = anfangswert */ \
474 if (lg_offset[t2] <= 0) \
475 { \
476 l[t2] = -lg_offset[t2]; \
477 count_lg[t2] = -1; \
478 } \
479 else \
480 { \
481 l[t2] = +lg_offset[t2]; \
482 count_lg[t2] = +1; \
483 } \
484 \
485 t2--; \
486 } \
487}
488
489#define MACRO_B(which_one) \
490static inline void B_ ## which_one (X(plan) *ths) \
491{ \
492 INT lprod; /* 'regular bandwidth' of matrix B */ \
493 INT u[ths->d], o[ths->d]; /* multi band with respect to x_j */ \
494 INT t, t2; /* index dimensions */ \
495 INT j; /* index nodes */ \
496 INT l_L, ix; /* index one row of B */ \
497 INT l[ths->d]; /* multi index u<=l<=o (real index of g in array) */ \
498 INT lj[ths->d]; /* multi index 0<=lc<2m+2 */ \
499 INT ll_plain[ths->d+1]; /* postfix plain index in g */ \
500 R phi_prod[ths->d+1]; /* postfix product of PHI */ \
501 R *f, *g; /* local copy */ \
502 R *fj; /* local copy */ \
503 R y[ths->d]; \
504 R fg_psi[ths->d][2*ths->m+2]; \
505 R fg_exp_l[ths->d][2*ths->m+2]; \
506 INT l_fg,lj_fg; \
507 R tmpEXP1, tmpEXP2, tmpEXP2sq, tmp1, tmp2, tmp3; \
508 R ip_w; \
509 INT ip_u; \
510 INT ip_s = ths->K/(ths->m+2); \
511 INT lg_offset[ths->d]; /* offset in g according to u */ \
512 INT count_lg[ths->d]; /* count summands (2m+2) */ \
513\
514 f = (R*)ths->f; g = (R*)ths->g; \
515\
516 MACRO_B_init_result_ ## which_one \
517\
518 if (ths->flags & PRE_FULL_PSI) \
519 { \
520 for (ix = 0, j = 0, fj = f; j < ths->M_total; j++, fj++) \
521 { \
522 for (l_L = 0; l_L < ths->psi_index_f[j]; l_L++, ix++) \
523 { \
524 MACRO_B_PRE_FULL_PSI_compute_ ## which_one; \
525 } \
526 } \
527 return; \
528 } \
529\
530 phi_prod[0] = K(1.0); \
531 ll_plain[0] = 0; \
532\
533 for (t = 0, lprod = 1; t < ths->d; t++) \
534 lprod *= (2 * ths->m + 2); \
535\
536 if (ths->flags & PRE_PSI) \
537 { \
538 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
539 { \
540 MACRO_init_uo_l_lj_t; \
541 \
542 for (l_L = 0; l_L < lprod; l_L++) \
543 { \
544 MACRO_update_phi_prod_ll_plain(which_one, with_PRE_PSI); \
545 \
546 MACRO_B_compute_ ## which_one; \
547 \
548 MACRO_count_uo_l_lj_t; \
549 } /* for(l_L) */ \
550 } /* for(j) */ \
551 return; \
552 } /* if(PRE_PSI) */ \
553 \
554 if (ths->flags & PRE_FG_PSI) \
555 { \
556 for (t = 0; t < ths->d; t++) \
557 { \
558 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
559 tmpEXP2sq = tmpEXP2 * tmpEXP2; \
560 tmp2 = K(1.0); \
561 tmp3 = K(1.0); \
562 fg_exp_l[t][0] = K(1.0); \
563 \
564 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
565 { \
566 tmp3 = tmp2 * tmpEXP2; \
567 tmp2 *= tmpEXP2sq; \
568 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
569 } \
570 } \
571 \
572 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
573 { \
574 MACRO_init_uo_l_lj_t; \
575 \
576 for (t = 0; t < ths->d; t++) \
577 { \
578 fg_psi[t][0] = ths->psi[2 * (j * ths->d + t)]; \
579 tmpEXP1 = ths->psi[2 * (j * ths->d + t) + 1]; \
580 tmp1 = K(1.0); \
581 \
582 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
583 { \
584 tmp1 *= tmpEXP1; \
585 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
586 } \
587 } \
588 \
589 for (l_L= 0; l_L < lprod; l_L++) \
590 { \
591 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
592 \
593 MACRO_B_compute_ ## which_one; \
594 \
595 MACRO_count_uo_l_lj_t; \
596 } \
597 } \
598 return; \
599 } \
600 \
601 if (ths->flags & FG_PSI) \
602 { \
603 for (t = 0; t < ths->d; t++) \
604 { \
605 tmpEXP2 = EXP(K(-1.0) / ths->b[t]); \
606 tmpEXP2sq = tmpEXP2 * tmpEXP2; \
607 tmp2 = K(1.0); \
608 tmp3 = K(1.0); \
609 fg_exp_l[t][0] = K(1.0); \
610 for (lj_fg = 1; lj_fg <= (2 * ths->m + 2); lj_fg++) \
611 { \
612 tmp3 = tmp2 * tmpEXP2; \
613 tmp2 *= tmpEXP2sq; \
614 fg_exp_l[t][lj_fg] = fg_exp_l[t][lj_fg-1] * tmp3; \
615 } \
616 } \
617 \
618 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
619 { \
620 MACRO_init_uo_l_lj_t; \
621 \
622 for (t = 0; t < ths->d; t++) \
623 { \
624 fg_psi[t][0] = (PHI((2 * NN(ths->n[t])), (ths->x[j*ths->d+t] - ((R)u[t])/(2 * NN(ths->n[t]))),(t)));\
625 \
626 tmpEXP1 = EXP(K(2.0) * ((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u[t]) / ths->b[t]); \
627 tmp1 = K(1.0); \
628 for (l_fg = u[t] + 1, lj_fg = 1; l_fg <= o[t]; l_fg++, lj_fg++) \
629 { \
630 tmp1 *= tmpEXP1; \
631 fg_psi[t][lj_fg] = fg_psi[t][0] * tmp1 * fg_exp_l[t][lj_fg]; \
632 } \
633 } \
634 \
635 for (l_L = 0; l_L < lprod; l_L++) \
636 { \
637 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
638 \
639 MACRO_B_compute_ ## which_one; \
640 \
641 MACRO_count_uo_l_lj_t; \
642 } \
643 } \
644 return; \
645 } \
646 \
647 if (ths->flags & PRE_LIN_PSI) \
648 { \
649 for (j = 0, fj = f; j < ths->M_total; j++, fj++) \
650 { \
651 MACRO_init_uo_l_lj_t; \
652 \
653 for (t = 0; t < ths->d; t++) \
654 { \
655 y[t] = (((2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - (R)u[t]) \
656 * ((R)ths->K))/(ths->m + 2); \
657 ip_u = LRINT(FLOOR(y[t])); \
658 ip_w = y[t]-ip_u; \
659 for (l_fg = u[t], lj_fg = 0; l_fg <= o[t]; l_fg++, lj_fg++) \
660 { \
661 fg_psi[t][lj_fg] = ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s)] \
662 * (1-ip_w) + ths->psi[(ths->K+1)*t + ABS(ip_u-lj_fg*ip_s+1)] \
663 * (ip_w); \
664 } \
665 } \
666 \
667 for (l_L = 0; l_L < lprod; l_L++) \
668 { \
669 MACRO_update_phi_prod_ll_plain(which_one, with_FG_PSI); \
670 \
671 MACRO_B_compute_ ## which_one; \
672 \
673 MACRO_count_uo_l_lj_t; \
674 } /* for(l_L) */ \
675 } /* for(j) */ \
676 return; \
677 } /* if(PRE_LIN_PSI) */ \
678 \
679 /* no precomputed psi at all */ \
680 for (j = 0, fj = &f[0]; j < ths->M_total; j++, fj += 1) \
681 { \
682 MACRO_init_uo_l_lj_t; \
683 \
684 for (l_L = 0; l_L < lprod; l_L++) \
685 { \
686 MACRO_update_phi_prod_ll_plain(which_one, without_PRE_PSI); \
687 \
688 MACRO_B_compute_ ## which_one; \
689 \
690 MACRO_count_uo_l_lj_t; \
691 } /* for (l_L) */ \
692 } /* for (j) */ \
693} /* B */
694
695MACRO_B(A)
696MACRO_B(T)
697
698
701void X(trafo)(X(plan) *ths)
702{
703 switch(ths->d)
704 {
705 default:
706 {
707 /* use ths->my_fftw_r2r_plan */
708 ths->g_hat = ths->g1;
709 ths->g = ths->g2;
710
711 /* form \f$ \hat g_k = \frac{\hat f_k}{c_k\left(\phi\right)} \text{ for }
712 * k \in I_N \f$ */
713 TIC(0)
714 D_A(ths);
715 TOC(0)
716
717 /* Compute by d-variate discrete Fourier transform
718 * \f$ g_l = \sum_{k \in I_N} \hat g_k {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
719 * \text{ for } l \in I_n \f$ */
720 TIC_FFTW(1)
721 FFTW(execute)(ths->my_fftw_r2r_plan);
722 TOC_FFTW(1)
723
724 /*if (ths->flags & PRE_FULL_PSI)
725 full_psi__A(ths);*/
726
727 /* Set \f$ f_j = \sum_{l \in I_n,m(x_j)} g_l \psi\left(x_j-\frac{l}{n}\right)
728 * \text{ for } j=0,\dots,M-1 \f$ */
729 TIC(2)
730 B_A(ths);
731 TOC(2)
732
733 /*if (ths->flags & PRE_FULL_PSI)
734 {
735 Y(free)(ths->psi_index_g);
736 Y(free)(ths->psi_index_f);
737 }*/
738 }
739 }
740} /* trafo */
741
742void X(adjoint)(X(plan) *ths)
743{
744 switch(ths->d)
745 {
746 default:
747 {
748 /* use ths->my_fftw_plan */
749 ths->g_hat = ths->g2;
750 ths->g = ths->g1;
751
752 /*if (ths->flags & PRE_FULL_PSI)
753 full_psi__T(ths);*/
754
755 /* Set \f$ g_l = \sum_{j=0}^{M-1} f_j \psi\left(x_j-\frac{l}{n}\right)
756 * \text{ for } l \in I_n,m(x_j) \f$ */
757 TIC(2)
758 B_T(ths);
759 TOC(2)
760
761 /* Compute by d-variate discrete cosine transform
762 * \f$ \hat g_k = \sum_{l \in I_n} g_l {\rm e}^{-2\pi {\rm i} \frac{kl}{n}}
763 * \text{ for } k \in I_N\f$ */
764 TIC_FFTW(1)
765 FFTW(execute)(ths->my_fftw_r2r_plan);
766 TOC_FFTW(1)
767
768 /* Form \f$ \hat f_k = \frac{\hat g_k}{c_k\left(\phi\right)} \text{ for }
769 * k \in I_N \f$ */
770 TIC(0)
771 D_T(ths);
772 TOC(0)
773 }
774 }
775} /* adjoint */
776
779static inline void precompute_phi_hut(X(plan) *ths)
780{
781 INT ks[ths->d]; /* index over all frequencies */
782 INT t; /* index over all dimensions */
783
784 ths->c_phi_inv = (R**) Y(malloc)((size_t)(ths->d) * sizeof(R*));
785
786 for (t = 0; t < ths->d; t++)
787 {
788 ths->c_phi_inv[t] = (R*)Y(malloc)((size_t)(ths->N[t] - OFFSET) * sizeof(R));
789
790 for (ks[t] = 0; ks[t] < ths->N[t] - OFFSET; ks[t]++)
791 {
792 ths->c_phi_inv[t][ks[t]] = (K(1.0) / (PHI_HUT((2 * NN(ths->n[t])), ks[t] + OFFSET, t)));
793 }
794 }
795} /* phi_hut */
796
802void X(precompute_lin_psi)(X(plan) *ths)
803{
804 INT t;
805 INT j;
806 R step;
808 for (t = 0; t < ths->d; t++)
809 {
810 step = ((R)(ths->m+2)) / (((R)ths->K) * (2 * NN(ths->n[t])));
811
812 for (j = 0; j <= ths->K; j++)
813 {
814 ths->psi[(ths->K + 1) * t + j] = PHI((2 * NN(ths->n[t])), (j * step), t);
815 } /* for(j) */
816 } /* for(t) */
817}
818
819void X(precompute_fg_psi)(X(plan) *ths)
820{
821 INT t; /* index over all dimensions */
822 INT u, o; /* depends on x_j */
823
824// sort(ths);
825
826 for (t = 0; t < ths->d; t++)
827 {
828 INT j;
829// #pragma omp parallel for default(shared) private(j,u,o)
830 for (j = 0; j < ths->M_total; j++)
831 {
832 uo(ths, j, &u, &o, t);
833
834 ths->psi[2 * (j*ths->d + t)] = (PHI((2 * NN(ths->n[t])),(ths->x[j * ths->d + t] - ((R)u) / (2 * NN(ths->n[t]))),(t)));
835 ths->psi[2 * (j*ths->d + t) + 1] = EXP(K(2.0) * ( (2 * NN(ths->n[t])) * ths->x[j * ths->d + t] - u) / ths->b[t]);
836 } /* for(j) */
837 }
838 /* for(t) */
839} /* nfft_precompute_fg_psi */
840
841void X(precompute_psi)(X(plan) *ths)
842{
843 INT t; /* index over all dimensions */
844 INT lj; /* index 0<=lj<u+o+1 */
845 INT u, o; /* depends on x_j */
846
847 //sort(ths);
848
849 for (t = 0; t < ths->d; t++)
850 {
851 INT j;
852
853 for (j = 0; j < ths->M_total; j++)
854 {
855 uo(ths, j, &u, &o, t);
856
857 for(lj = 0; lj < (2 * ths->m + 2); lj++)
858 ths->psi[(j * ths->d + t) * (2 * ths->m + 2) + lj] =
859 (PHI((2 * NN(ths->n[t])), ((ths->x[(j) * ths->d + (t)]) - ((R)(lj + u)) / (K(2.0) * ((R)NN(ths->n[t])))), t));
860 } /* for (j) */
861 } /* for (t) */
862} /* precompute_psi */
863
864void X(precompute_full_psi)(X(plan) *ths)
865{
866//#ifdef _OPENMP
867// sort(ths);
868//
869// nfft_precompute_full_psi_omp(ths);
870//#else
871 INT t, t2; /* index over all dimensions */
872 INT j; /* index over all nodes */
873 INT l_L; /* plain index 0 <= l_L < lprod */
874 INT l[ths->d]; /* multi index u<=l<=o */
875 INT lj[ths->d]; /* multi index 0<=lj<u+o+1 */
876 INT ll_plain[ths->d+1]; /* postfix plain index */
877 INT lprod; /* 'bandwidth' of matrix B */
878 INT u[ths->d], o[ths->d]; /* depends on x_j */
879 INT count_lg[ths->d];
880 INT lg_offset[ths->d];
881
882 R phi_prod[ths->d+1];
883
884 INT ix, ix_old;
885
886 //sort(ths);
887
888 phi_prod[0] = K(1.0);
889 ll_plain[0] = 0;
890
891 for (t = 0, lprod = 1; t < ths->d; t++)
892 lprod *= 2 * ths->m + 2;
893
894 for (j = 0, ix = 0, ix_old = 0; j < ths->M_total; j++)
895 {
896 MACRO_init_uo_l_lj_t;
897
898 for (l_L = 0; l_L < lprod; l_L++, ix++)
899 {
900 MACRO_update_phi_prod_ll_plain(A, without_PRE_PSI);
901
902 ths->psi_index_g[ix] = ll_plain[ths->d];
903 ths->psi[ix] = phi_prod[ths->d];
904
905 MACRO_count_uo_l_lj_t;
906 } /* for (l_L) */
907
908 ths->psi_index_f[j] = ix - ix_old;
909 ix_old = ix;
910 } /* for(j) */
911//#endif
912}
913
914void X(precompute_one_psi)(X(plan) *ths)
915{
916 if(ths->flags & PRE_PSI)
917 X(precompute_psi)(ths);
918 if(ths->flags & PRE_FULL_PSI)
919 X(precompute_full_psi)(ths);
920 if(ths->flags & PRE_FG_PSI)
921 X(precompute_fg_psi)(ths);
922 if(ths->flags & PRE_LIN_PSI)
923 X(precompute_lin_psi)(ths);
924}
925
926static inline void init_help(X(plan) *ths)
927{
928 INT t; /* index over all dimensions */
929 INT lprod; /* 'bandwidth' of matrix B */
930
931 if (ths->flags & NFFT_OMP_BLOCKWISE_ADJOINT)
932 ths->flags |= NFFT_SORT_NODES;
933
934 ths->N_total = intprod(ths->N, OFFSET, ths->d);
935 ths->n_total = intprod(ths->n, 0, ths->d);
936
937 ths->sigma = (R*)Y(malloc)((size_t)(ths->d) * sizeof(R));
938
939 for (t = 0; t < ths->d; t++)
940 ths->sigma[t] = ((R)NN(ths->n[t])) / ths->N[t];
941
942 /* Assign r2r transform kinds for each dimension */
943 ths->r2r_kind = (FFTW(r2r_kind)*)Y(malloc)((size_t)(ths->d) * sizeof (FFTW(r2r_kind)));
944 for (t = 0; t < ths->d; t++)
945 ths->r2r_kind[t] = FOURIER_TRAFO;
946
947 WINDOW_HELP_INIT;
948
949 if (ths->flags & MALLOC_X)
950 ths->x = (R*)Y(malloc)((size_t)(ths->d * ths->M_total) * sizeof(R));
951
952 if (ths->flags & MALLOC_F_HAT)
953 ths->f_hat = (R*)Y(malloc)((size_t)(ths->N_total) * sizeof(R));
954
955 if (ths->flags & MALLOC_F)
956 ths->f = (R*)Y(malloc)((size_t)(ths->M_total) * sizeof(R));
957
958 if (ths->flags & PRE_PHI_HUT)
959 precompute_phi_hut(ths);
960
961 if(ths->flags & PRE_LIN_PSI)
962 {
963 ths->K = (1U<< 10) * (ths->m+2);
964 ths->psi = (R*) Y(malloc)((size_t)((ths->K + 1) * ths->d) * sizeof(R));
965 }
966
967 if(ths->flags & PRE_FG_PSI)
968 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * 2) * sizeof(R));
969
970 if (ths->flags & PRE_PSI)
971 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * ths->d * (2 * ths->m + 2 )) * sizeof(R));
972
973 if(ths->flags & PRE_FULL_PSI)
974 {
975 for (t = 0, lprod = 1; t < ths->d; t++)
976 lprod *= 2 * ths->m + 2;
977
978 ths->psi = (R*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(R));
979
980 ths->psi_index_f = (INT*) Y(malloc)((size_t)(ths->M_total) * sizeof(INT));
981 ths->psi_index_g = (INT*) Y(malloc)((size_t)(ths->M_total * lprod) * sizeof(INT));
982 }
983
984 if (ths->flags & FFTW_INIT)
985 {
986 ths->g1 = (R*)Y(malloc)((size_t)(ths->n_total) * sizeof(R));
987
988 if (ths->flags & FFT_OUT_OF_PLACE)
989 ths->g2 = (R*) Y(malloc)((size_t)(ths->n_total) * sizeof(R));
990 else
991 ths->g2 = ths->g1;
992
993 {
994 int *_n = Y(malloc)((size_t)(ths->d) * sizeof(int));
995
996 for (t = 0; t < ths->d; t++)
997 _n[t] = (int)(ths->n[t]);
998
999 ths->my_fftw_r2r_plan = FFTW(plan_r2r)((int)ths->d, _n, ths->g1, ths->g2, ths->r2r_kind, ths->fftw_flags);
1000 Y(free)(_n);
1001 }
1002 }
1003
1004// if(ths->flags & NFFT_SORT_NODES)
1005// ths->index_x = (INT*) Y(malloc)(sizeof(INT)*2*ths->M_total);
1006// else
1007// ths->index_x = NULL;
1008
1009 ths->mv_trafo = (void (*) (void* ))X(trafo);
1010 ths->mv_adjoint = (void (*) (void* ))X(adjoint);
1011}
1012
1013void X(init)(X(plan) *ths, int d, int *N, int M_total)
1014{
1015 int t; /* index over all dimensions */
1016
1017 ths->d = (INT)d;
1018
1019 ths->N = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1020
1021 for (t = 0; t < d; t++)
1022 ths->N[t] = (INT)N[t];
1023
1024 ths->M_total = (INT)M_total;
1025
1026 ths->n = (INT*) Y(malloc)((size_t)(d) * sizeof(INT));
1027
1028 for (t = 0; t < d; t++)
1029 ths->n[t] = 2 * (Y(next_power_of_2)(ths->N[t]) - 1) + OFFSET;
1030
1031 ths->m = WINDOW_HELP_ESTIMATE_m;
1032
1033 if (d > 1)
1034 {
1035//#ifdef _OPENMP
1036// ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1037// FFTW_INIT | NFFT_SORT_NODES | NFFT_OMP_BLOCKWISE_ADJOINT;
1038//#else
1039 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1040 FFTW_INIT | NFFT_SORT_NODES;
1041//#endif
1042 }
1043 else
1044 ths->flags = PRE_PHI_HUT | PRE_PSI | MALLOC_X| MALLOC_F_HAT | MALLOC_F |
1046
1047 ths->fftw_flags = FFTW_ESTIMATE | FFTW_DESTROY_INPUT;
1048
1049 init_help(ths);
1050}
1051
1052void X(init_guru)(X(plan) *ths, int d, int *N, int M_total, int *n, int m,
1053 unsigned flags, unsigned fftw_flags)
1054{
1055 INT t; /* index over all dimensions */
1056
1057 ths->d = (INT)d;
1058 ths->M_total = (INT)M_total;
1059 ths->N = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1060
1061 for (t = 0; t < d; t++)
1062 ths->N[t] = (INT)N[t];
1063
1064 ths->n = (INT*)Y(malloc)((size_t)(ths->d) * sizeof(INT));
1065
1066 for (t = 0; t < d; t++)
1067 ths->n[t] = (INT)n[t];
1068
1069 ths->m = (INT)m;
1070
1071 ths->flags = flags;
1072 ths->fftw_flags = fftw_flags;
1073
1074 init_help(ths);
1075}
1076
1077void X(init_1d)(X(plan) *ths, int N1, int M_total)
1078{
1079 int N[1];
1080
1081 N[0] = N1;
1082
1083 X(init)(ths, 1, N, M_total);
1084}
1085
1086void X(init_2d)(X(plan) *ths, int N1, int N2, int M_total)
1087{
1088 int N[2];
1089
1090 N[0] = N1;
1091 N[1] = N2;
1092
1093 X(init)(ths, 2, N, M_total);
1094}
1095
1096void X(init_3d)(X(plan) *ths, int N1, int N2, int N3, int M_total)
1097{
1098 int N[3];
1099
1100 N[0] = N1;
1101 N[1] = N2;
1102 N[2] = N3;
1103
1104 X(init)(ths, 3, N, M_total);
1105}
1106
1107const char* X(check)(X(plan) *ths)
1108{
1109 INT j;
1110
1111 if (!ths->f)
1112 return "Member f not initialized.";
1113
1114 if (!ths->x)
1115 return "Member x not initialized.";
1116
1117 if (!ths->f_hat)
1118 return "Member f_hat not initialized.";
1119
1120 for (j = 0; j < ths->M_total * ths->d; j++)
1121 {
1122 if ((ths->x[j] < K(0.0)) || (ths->x[j] >= K(0.5)))
1123 {
1124 return "ths->x out of range [0.0,0.5)";
1125 }
1126 }
1127
1128 for (j = 0; j < ths->d; j++)
1129 {
1130 if (ths->sigma[j] <= 1)
1131 return "Oversampling factor too small";
1132
1133 if(ths->N[j] - 1 <= ths->m)
1134 return "Polynomial degree N is smaller than cut-off m";
1135 }
1136 return 0;
1137}
1138
1139void X(finalize)(X(plan) *ths)
1140{
1141 INT t; /* index over dimensions */
1142
1143// if(ths->flags & NFFT_SORT_NODES)
1144// Y(free)(ths->index_x);
1145
1146 if (ths->flags & FFTW_INIT)
1147 {
1148#ifdef _OPENMP
1149 #pragma omp critical (nfft_omp_critical_fftw_plan)
1150#endif
1151 FFTW(destroy_plan)(ths->my_fftw_r2r_plan);
1152
1153 if (ths->flags & FFT_OUT_OF_PLACE)
1154 Y(free)(ths->g2);
1155
1156 Y(free)(ths->g1);
1157 }
1158
1159 if(ths->flags & PRE_FULL_PSI)
1160 {
1161 Y(free)(ths->psi_index_g);
1162 Y(free)(ths->psi_index_f);
1163 Y(free)(ths->psi);
1164 }
1165
1166 if (ths->flags & PRE_PSI)
1167 Y(free)(ths->psi);
1168
1169 if(ths->flags & PRE_FG_PSI)
1170 Y(free)(ths->psi);
1171
1172 if(ths->flags & PRE_LIN_PSI)
1173 Y(free)(ths->psi);
1174
1175 if (ths->flags & PRE_PHI_HUT)
1176 {
1177 for (t = 0; t < ths->d; t++)
1178 Y(free)(ths->c_phi_inv[t]);
1179 Y(free)(ths->c_phi_inv);
1180 }
1181
1182 if (ths->flags & MALLOC_F)
1183 Y(free)(ths->f);
1184
1185 if(ths->flags & MALLOC_F_HAT)
1186 Y(free)(ths->f_hat);
1187
1188 if (ths->flags & MALLOC_X)
1189 Y(free)(ths->x);
1190
1191 WINDOW_HELP_FINALIZE;
1192
1193 Y(free)(ths->N);
1194 Y(free)(ths->n);
1195 Y(free)(ths->sigma);
1196
1197 Y(free)(ths->r2r_kind);
1198} /* finalize */
#define MALLOC_F_HAT
Definition nfft3.h:194
#define MALLOC_X
Definition nfft3.h:193
#define PRE_FULL_PSI
Definition nfft3.h:192
#define FFT_OUT_OF_PLACE
Definition nfft3.h:196
#define PRE_PSI
Definition nfft3.h:191
#define PRE_FG_PSI
Definition nfft3.h:190
#define MALLOC_F
Definition nfft3.h:195
#define PRE_LIN_PSI
Definition nfft3.h:189
#define FFTW_INIT
Definition nfft3.h:197
#define PRE_PHI_HUT
Definition nfft3.h:187
#define TIC(a)
Timing, method works since the inaccurate timer is updated mostly in the measured function.
Definition infft.h:1400
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.