NFFT  3.3.0
fpt.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id$ */
20 
27 #include "config.h"
28 
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdbool.h>
32 #include <math.h>
33 #ifdef HAVE_COMPLEX_H
34 #include <complex.h>
35 #endif
36 
37 #include "nfft3.h"
38 #include "infft.h"
39 
40 /* Macros for index calculation. */
41 
43 #define K_START_TILDE(x,y) (MAX(MIN(x,y-2),0))
44 
46 #define K_END_TILDE(x,y) MIN(x,y-1)
47 
49 #define FIRST_L(x,y) (LRINT(floor((x)/(double)y)))
50 
52 #define LAST_L(x,y) (LRINT(ceil(((x)+1)/(double)y))-1)
53 
54 #define N_TILDE(y) (y-1)
55 
56 #define IS_SYMMETRIC(x,y,z) (x >= ((y-1.0)/z))
57 
58 #define FPT_BREAK_EVEN 4
59 
63 typedef struct fpt_step_
64 {
65  bool stable;
68  int Ns;
69  int ts;
70  double **a11,**a12,**a21,**a22;
71  double *g;
72 } fpt_step;
73 
77 typedef struct fpt_data_
78 {
80  int k_start;
81  double *alphaN;
82  double *betaN;
83  double *gammaN;
84  double alpha_0;
85  double beta_0;
86  double gamma_m1;
87  /* Data for direct transform. */
88  double *_alpha;
89  double *_beta;
90  double *_gamma;
91 } fpt_data;
92 
96 typedef struct fpt_set_s_
97 {
98  int flags;
99  int M;
100  int N;
102  int t;
104  double **xcvecs;
107  double *xc;
108  double _Complex *temp;
109  double _Complex *work;
110  double _Complex *result;
111  double _Complex *vec3;
112  double _Complex *vec4;
113  double _Complex *z;
114  fftw_plan *plans_dct3;
116  fftw_plan *plans_dct2;
118  fftw_r2r_kind *kinds;
120  fftw_r2r_kind *kindsr;
123  int *lengths;
125  /* Data for slow transforms. */
126  double *xc_slow;
127 } fpt_set_s;
128 
129 static inline void abuvxpwy(double a, double b, double _Complex* u, double _Complex* x,
130  double* v, double _Complex* y, double* w, int n)
131 {
132  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
133  double *v_ptr = v, *w_ptr = w;
134  for (l = 0; l < n; l++)
135  *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
136 }
137 
138 #define ABUVXPWY_SYMMETRIC(NAME,S1,S2) \
139 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
140  double* v, double _Complex* y, double* w, int n) \
141 { \
142  const int n2 = n>>1; \
143  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
144  double *v_ptr = v, *w_ptr = w; \
145  for (l = 0; l < n2; l++) \
146  *u_ptr++ = a * (b * (*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++)); \
147  v_ptr--; w_ptr--; \
148  for (l = 0; l < n2; l++) \
149  *u_ptr++ = a * (b * S1 * (*v_ptr--) * (*x_ptr++) + S2 * (*w_ptr--) * (*y_ptr++)); \
150 }
151 
152 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric1,1.0,-1.0)
153 ABUVXPWY_SYMMETRIC(abuvxpwy_symmetric2,-1.0,1.0)
154 
155 #define ABUVXPWY_SYMMETRIC_1(NAME,S1) \
156 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
157  double* v, double _Complex* y, int n, double *xx) \
158 { \
159  const int n2 = n>>1; \
160  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
161  double *v_ptr = v, *xx_ptr = xx; \
162  for (l = 0; l < n2; l++, v_ptr++) \
163  *u_ptr++ = a * (b * (*v_ptr) * (*x_ptr++) + ((*v_ptr)*(1.0+*xx_ptr++)) * (*y_ptr++)); \
164  v_ptr--; \
165  for (l = 0; l < n2; l++, v_ptr--) \
166  *u_ptr++ = a * (b * S1 * (*v_ptr) * (*x_ptr++) + (S1 * (*v_ptr) * (1.0+*xx_ptr++)) * (*y_ptr++)); \
167 }
168 
169 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_1,1.0)
170 ABUVXPWY_SYMMETRIC_1(abuvxpwy_symmetric1_2,-1.0)
171 
172 #define ABUVXPWY_SYMMETRIC_2(NAME,S1) \
173 static inline void NAME(double a, double b, double _Complex* u, double _Complex* x, \
174  double _Complex* y, double* w, int n, double *xx) \
175 { \
176  const int n2 = n>>1; \
177  int l; double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y; \
178  double *w_ptr = w, *xx_ptr = xx; \
179  for (l = 0; l < n2; l++, w_ptr++) \
180  *u_ptr++ = a * (b * (*w_ptr/(1.0+*xx_ptr++)) * (*x_ptr++) + (*w_ptr) * (*y_ptr++)); \
181  w_ptr--; \
182  for (l = 0; l < n2; l++, w_ptr--) \
183  *u_ptr++ = a * (b * (S1 * (*w_ptr)/(1.0+*xx_ptr++) ) * (*x_ptr++) + S1 * (*w_ptr) * (*y_ptr++)); \
184 }
185 
186 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_1,1.0)
187 ABUVXPWY_SYMMETRIC_2(abuvxpwy_symmetric2_2,-1.0)
188 
189 static inline void auvxpwy(double a, double _Complex* u, double _Complex* x, double* v,
190  double _Complex* y, double* w, int n)
191 {
192  int l;
193  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
194  double *v_ptr = v, *w_ptr = w;
195  for (l = n; l > 0; l--)
196  *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
197 }
198 
199 static inline void auvxpwy_symmetric(double a, double _Complex* u, double _Complex* x,
200  double* v, double _Complex* y, double* w, int n)
201 {
202  const int n2 = n>>1; \
203  int l;
204  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
205  double *v_ptr = v, *w_ptr = w;
206  for (l = n2; l > 0; l--)
207  *u_ptr++ = a * ((*v_ptr++) * (*x_ptr++) + (*w_ptr++) * (*y_ptr++));
208  v_ptr--; w_ptr--;
209  for (l = n2; l > 0; l--)
210  *u_ptr++ = a * ((*v_ptr--) * (*x_ptr++) - (*w_ptr--) * (*y_ptr++));
211 }
212 
213 static inline void auvxpwy_symmetric_1(double a, double _Complex* u, double _Complex* x,
214  double* v, double _Complex* y, double* w, int n, double *xx)
215 {
216  const int n2 = n>>1; \
217  int l;
218  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
219  double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
220  for (l = n2; l > 0; l--, xx_ptr++)
221  *u_ptr++ = a * (((*v_ptr++)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)*(1.0+*xx_ptr)) * (*y_ptr++));
222  v_ptr--; w_ptr--;
223  for (l = n2; l > 0; l--, xx_ptr++)
224  *u_ptr++ = a * (-((*v_ptr--)*(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)*(1.0+*xx_ptr)) * (*y_ptr++));
225 }
226 
227 static inline void auvxpwy_symmetric_2(double a, double _Complex* u, double _Complex* x,
228  double* v, double _Complex* y, double* w, int n, double *xx)
229 {
230  const int n2 = n>>1; \
231  int l;
232  double _Complex *u_ptr = u, *x_ptr = x, *y_ptr = y;
233  double *v_ptr = v, *w_ptr = w, *xx_ptr = xx;
234  for (l = n2; l > 0; l--, xx_ptr++)
235  *u_ptr++ = a * (((*v_ptr++)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr++)/(1.0+*xx_ptr)) * (*y_ptr++));
236  v_ptr--; w_ptr--;
237  for (l = n2; l > 0; l--, xx_ptr++)
238  *u_ptr++ = a * (-((*v_ptr--)/(1.0+*xx_ptr)) * (*x_ptr++) + ((*w_ptr--)/(1.0+*xx_ptr)) * (*y_ptr++));
239 }
240 
241 #define FPT_DO_STEP(NAME,M1_FUNCTION,M2_FUNCTION) \
242 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, double *a12, \
243  double *a21, double *a22, double g, int tau, fpt_set set) \
244 { \
245  \
246  int length = 1<<(tau+1); \
247  \
248  double norm = 1.0/(length<<1); \
249  \
250  /* Compensate for factors introduced by a raw DCT-III. */ \
251  a[0] *= 2.0; \
252  b[0] *= 2.0; \
253  \
254  /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
255  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
256  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
257  \
258  /*for (k = 0; k < length; k++)*/ \
259  /*{*/ \
260  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/ \
261  /* a11[k],a12[k],a21[k],a22[k]);*/ \
262  /*}*/ \
263  \
264  /* Check, if gamma is zero. */ \
265  if (g == 0.0) \
266  { \
267  /*fprintf(stderr,"gamma == 0!\n");*/ \
268  /* Perform multiplication only for second row. */ \
269  M2_FUNCTION(norm,b,b,a22,a,a21,length); \
270  } \
271  else \
272  { \
273  /*fprintf(stderr,"gamma != 0!\n");*/ \
274  /* Perform multiplication for both rows. */ \
275  M2_FUNCTION(norm,set->z,b,a22,a,a21,length); \
276  M1_FUNCTION(norm*g,a,a,a11,b,a12,length); \
277  memcpy(b,set->z,length*sizeof(double _Complex)); \
278  /* Compute Chebyshev-coefficients using a DCT-II. */ \
279  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
280  /* Compensate for factors introduced by a raw DCT-II. */ \
281  a[0] *= 0.5; \
282  } \
283  \
284  /* Compute Chebyshev-coefficients using a DCT-II. */ \
285  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
286  /* Compensate for factors introduced by a raw DCT-II. */ \
287  b[0] *= 0.5; \
288 }
289 
290 FPT_DO_STEP(fpt_do_step,auvxpwy,auvxpwy)
291 FPT_DO_STEP(fpt_do_step_symmetric,auvxpwy_symmetric,auvxpwy_symmetric)
292 /*FPT_DO_STEP(fpt_do_step_symmetric_u,auvxpwy_symmetric,auvxpwy)
293 FPT_DO_STEP(fpt_do_step_symmetric_l,auvxpwy,auvxpwy_symmetric)*/
294 
295 static inline void fpt_do_step_symmetric_u(double _Complex *a, double _Complex *b,
296  double *a11, double *a12, double *a21, double *a22, double *x,
297  double gam, int tau, fpt_set set)
298 {
300  int length = 1<<(tau+1);
302  double norm = 1.0/(length<<1);
303 
304  UNUSED(a21); UNUSED(a22);
305 
306  /* Compensate for factors introduced by a raw DCT-III. */
307  a[0] *= 2.0;
308  b[0] *= 2.0;
309 
310  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
311  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
312  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
313 
314  /*for (k = 0; k < length; k++)*/
315  /*{*/
316  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
317  /* a11[k],a12[k],a21[k],a22[k]);*/
318  /*}*/
319 
320  /* Check, if gamma is zero. */
321  if (gam == 0.0)
322  {
323  /*fprintf(stderr,"gamma == 0!\n");*/
324  /* Perform multiplication only for second row. */
325  auvxpwy_symmetric_1(norm,b,b,a12,a,a11,length,x);
326  }
327  else
328  {
329  /*fprintf(stderr,"gamma != 0!\n");*/
330  /* Perform multiplication for both rows. */
331  auvxpwy_symmetric_1(norm,set->z,b,a12,a,a11,length,x);
332  auvxpwy_symmetric(norm*gam,a,a,a11,b,a12,length);
333  memcpy(b,set->z,length*sizeof(double _Complex));
334  /* Compute Chebyshev-coefficients using a DCT-II. */
335  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
336  /* Compensate for factors introduced by a raw DCT-II. */
337  a[0] *= 0.5;
338  }
339 
340  /* Compute Chebyshev-coefficients using a DCT-II. */
341  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
342  /* Compensate for factors introduced by a raw DCT-II. */
343  b[0] *= 0.5;
344 }
345 
346 static inline void fpt_do_step_symmetric_l(double _Complex *a, double _Complex *b,
347  double *a11, double *a12, double *a21, double *a22, double *x, double gam, int tau, fpt_set set)
348 {
350  int length = 1<<(tau+1);
352  double norm = 1.0/(length<<1);
353 
354  /* Compensate for factors introduced by a raw DCT-III. */
355  a[0] *= 2.0;
356  b[0] *= 2.0;
357 
358  UNUSED(a11); UNUSED(a12);
359 
360  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
361  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
362  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
363 
364  /*for (k = 0; k < length; k++)*/
365  /*{*/
366  /*fprintf(stderr,"fpt_do_step: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",*/
367  /* a11[k],a12[k],a21[k],a22[k]);*/
368  /*}*/
369 
370  /* Check, if gamma is zero. */
371  if (gam == 0.0)
372  {
373  /*fprintf(stderr,"gamma == 0!\n");*/
374  /* Perform multiplication only for second row. */
375  auvxpwy_symmetric(norm,b,b,a22,a,a21,length);
376  }
377  else
378  {
379  /*fprintf(stderr,"gamma != 0!\n");*/
380  /* Perform multiplication for both rows. */
381  auvxpwy_symmetric(norm,set->z,b,a22,a,a21,length);
382  auvxpwy_symmetric_2(norm*gam,a,a,a21,b,a22,length,x);
383  memcpy(b,set->z,length*sizeof(double _Complex));
384  /* Compute Chebyshev-coefficients using a DCT-II. */
385  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
386  /* Compensate for factors introduced by a raw DCT-II. */
387  a[0] *= 0.5;
388  }
389 
390  /* Compute Chebyshev-coefficients using a DCT-II. */
391  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
392  /* Compensate for factors introduced by a raw DCT-II. */
393  b[0] *= 0.5;
394 }
395 
396 #define FPT_DO_STEP_TRANSPOSED(NAME,M1_FUNCTION,M2_FUNCTION) \
397 static inline void NAME(double _Complex *a, double _Complex *b, double *a11, \
398  double *a12, double *a21, double *a22, double g, int tau, fpt_set set) \
399 { \
400  \
401  int length = 1<<(tau+1); \
402  \
403  double norm = 1.0/(length<<1); \
404  \
405  /* Compute function values from Chebyshev-coefficients using a DCT-III. */ \
406  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a); \
407  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b); \
408  \
409  /* Perform matrix multiplication. */ \
410  M1_FUNCTION(norm,g,set->z,a,a11,b,a21,length); \
411  M2_FUNCTION(norm,g,b,a,a12,b,a22,length); \
412  memcpy(a,set->z,length*sizeof(double _Complex)); \
413  \
414  /* Compute Chebyshev-coefficients using a DCT-II. */ \
415  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a); \
416  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b); \
417 }
418 
419 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t,abuvxpwy,abuvxpwy)
420 FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric,abuvxpwy_symmetric1,abuvxpwy_symmetric2)
421 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_u,abuvxpwy_symmetric1_1,abuvxpwy_symmetric1_2)*/
422 /*FPT_DO_STEP_TRANSPOSED(fpt_do_step_t_symmetric_l,abuvxpwy_symmetric2_2,abuvxpwy_symmetric2_1)*/
423 
424 
425 static inline void fpt_do_step_t_symmetric_u(double _Complex *a,
426  double _Complex *b, double *a11, double *a12, double *x,
427  double gam, int tau, fpt_set set)
428 {
430  int length = 1<<(tau+1);
432  double norm = 1.0/(length<<1);
433 
434  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
435  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
436  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
437 
438  /* Perform matrix multiplication. */
439  abuvxpwy_symmetric1_1(norm,gam,set->z,a,a11,b,length,x);
440  abuvxpwy_symmetric1_2(norm,gam,b,a,a12,b,length,x);
441  memcpy(a,set->z,length*sizeof(double _Complex));
442 
443  /* Compute Chebyshev-coefficients using a DCT-II. */
444  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
445  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
446 }
447 
448 static inline void fpt_do_step_t_symmetric_l(double _Complex *a,
449  double _Complex *b, double *a21, double *a22,
450  double *x, double gam, int tau, fpt_set set)
451 {
453  int length = 1<<(tau+1);
455  double norm = 1.0/(length<<1);
456 
457  /* Compute function values from Chebyshev-coefficients using a DCT-III. */
458  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)a,(double*)a);
459  fftw_execute_r2r(set->plans_dct3[tau-1],(double*)b,(double*)b);
460 
461  /* Perform matrix multiplication. */
462  abuvxpwy_symmetric2_2(norm,gam,set->z,a,b,a21,length,x);
463  abuvxpwy_symmetric2_1(norm,gam,b,a,b,a22,length,x);
464  memcpy(a,set->z,length*sizeof(double _Complex));
465 
466  /* Compute Chebyshev-coefficients using a DCT-II. */
467  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)a,(double*)a);
468  fftw_execute_r2r(set->plans_dct2[tau-1],(double*)b,(double*)b);
469 }
470 
471 
472 static void eval_clenshaw(const double *x, double *y, int size, int k, const double *alpha,
473  const double *beta, const double *gam)
474 {
475  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
476  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
477  */
478  int i,j;
479  double a,b,x_val_act,a_old;
480  const double *x_act;
481  double *y_act;
482  const double *alpha_act, *beta_act, *gamma_act;
483 
484  /* Traverse all nodes. */
485  x_act = x;
486  y_act = y;
487  for (i = 0; i < size; i++)
488  {
489  a = 1.0;
490  b = 0.0;
491  x_val_act = *x_act;
492 
493  if (k == 0)
494  {
495  *y_act = 1.0;
496  }
497  else
498  {
499  alpha_act = &(alpha[k]);
500  beta_act = &(beta[k]);
501  gamma_act = &(gam[k]);
502  for (j = k; j > 1; j--)
503  {
504  a_old = a;
505  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
506  b = a_old*(*gamma_act);
507  alpha_act--;
508  beta_act--;
509  gamma_act--;
510  }
511  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
512  }
513  x_act++;
514  y_act++;
515  }
516 }
517 
518 static void eval_clenshaw2(const double *x, double *z, double *y, int size1, int size, int k, const double *alpha,
519  const double *beta, const double *gam)
520 {
521  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
522  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
523  */
524  int i,j;
525  double a,b,x_val_act,a_old;
526  const double *x_act;
527  double *y_act, *z_act;
528  const double *alpha_act, *beta_act, *gamma_act;
529 
530  /* Traverse all nodes. */
531  x_act = x;
532  y_act = y;
533  z_act = z;
534  for (i = 0; i < size; i++)
535  {
536  a = 1.0;
537  b = 0.0;
538  x_val_act = *x_act;
539 
540  if (k == 0)
541  {
542  *y_act = 1.0;
543  *z_act = 0.0;
544  }
545  else
546  {
547  alpha_act = &(alpha[k]);
548  beta_act = &(beta[k]);
549  gamma_act = &(gam[k]);
550  for (j = k; j > 1; j--)
551  {
552  a_old = a;
553  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
554  b = a_old*(*gamma_act);
555  alpha_act--;
556  beta_act--;
557  gamma_act--;
558  }
559  if (i < size1)
560  *z_act = a;
561  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
562  }
563 
564  x_act++;
565  y_act++;
566  z_act++;
567  }
568  /*gamma_act++;
569  FILE *f = fopen("/Users/keiner/Desktop/nfsft_debug.txt","a");
570  fprintf(f,"size1: %10d, size: %10d\n",size1,size);
571  fclose(f);*/
572 }
573 
574 static int eval_clenshaw_thresh2(const double *x, double *z, double *y, int size, int k,
575  const double *alpha, const double *beta, const double *gam, const
576  double threshold)
577 {
578  /* Evaluate the associated Legendre polynomial P_{k,nleg} (l,x) for the vector
579  * of knots x[0], ..., x[size-1] by the Clenshaw algorithm
580  */
581  int i,j;
582  double a,b,x_val_act,a_old;
583  const double *x_act;
584  double *y_act, *z_act;
585  const double *alpha_act, *beta_act, *gamma_act;
586  R max = -nfft_float_property(NFFT_R_MAX);
587  const R t = LOG10(FABS(threshold));
588 
589  /* Traverse all nodes. */
590  x_act = x;
591  y_act = y;
592  z_act = z;
593  for (i = 0; i < size; i++)
594  {
595  a = 1.0;
596  b = 0.0;
597  x_val_act = *x_act;
598 
599  if (k == 0)
600  {
601  *y_act = 1.0;
602  *z_act = 0.0;
603  }
604  else
605  {
606  alpha_act = &(alpha[k]);
607  beta_act = &(beta[k]);
608  gamma_act = &(gam[k]);
609  for (j = k; j > 1; j--)
610  {
611  a_old = a;
612  a = b + a_old*((*alpha_act)*x_val_act+(*beta_act));
613  b = a_old*(*gamma_act);
614  alpha_act--;
615  beta_act--;
616  gamma_act--;
617  }
618  *z_act = a;
619  *y_act = (a*((*alpha_act)*x_val_act+(*beta_act))+b);
620  max = FMAX(max,LOG10(FABS(*y_act)));
621  if (max > t)
622  return 1;
623  }
624  x_act++;
625  y_act++;
626  z_act++;
627  }
628  return 0;
629 }
630 
631 static inline void eval_sum_clenshaw_fast(const int N, const int M,
632  const double _Complex *a, const double *x, double _Complex *y,
633  const double *alpha, const double *beta, const double *gam,
634  const double lambda)
635 {
636  int j,k;
637  double _Complex tmp1, tmp2, tmp3;
638  double xc;
639 
640  /*fprintf(stderr, "Executing eval_sum_clenshaw_fast.\n");
641  fprintf(stderr, "Before transform:\n");
642  for (j = 0; j < N; j++)
643  fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);
644  for (j = 0; j <= M; j++)
645  fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]);*/
646 
647  if (N == 0)
648  for (j = 0; j <= M; j++)
649  y[j] = a[0];
650  else
651  {
652  for (j = 0; j <= M; j++)
653  {
654 #if 0
655  xc = x[j];
656  tmp2 = a[N];
657  tmp1 = a[N-1] + (alpha[N-1] * xc + beta[N-1])*tmp2;
658  for (k = N-1; k > 0; k--)
659  {
660  tmp3 = a[k-1]
661  + (alpha[k-1] * xc + beta[k-1]) * tmp1
662  + gam[k] * tmp2;
663  tmp2 = tmp1;
664  tmp1 = tmp3;
665  }
666  y[j] = lambda * tmp1;
667 #else
668  xc = x[j];
669  tmp1 = a[N-1];
670  tmp2 = a[N];
671  for (k = N-1; k > 0; k--)
672  {
673  tmp3 = a[k-1] + tmp2 * gam[k];
674  tmp2 *= (alpha[k] * xc + beta[k]);
675  tmp2 += tmp1;
676  tmp1 = tmp3;
677  /*if (j == 1515)
678  {
679  fprintf(stderr, "k = %d, tmp1 = %e, tmp2 = %e.\n", k, tmp1, tmp2);
680  }*/
681  }
682  tmp2 *= (alpha[0] * xc + beta[0]);
683  //fprintf(stderr, "alpha[0] = %e, beta[0] = %e.\n", alpha[0], beta[0]);
684  y[j] = lambda * (tmp2 + tmp1);
685  //fprintf(stderr, "lambda = %e.\n", lambda);
686 #endif
687  }
688  }
689  /*fprintf(stderr, "Before transform:\n");
690  for (j = 0; j < N; j++)
691  fprintf(stderr, "a[%4d] = %e.\n", j, a[j]);
692  for (j = 0; j <= M; j++)
693  fprintf(stderr, "x[%4d] = %e, y[%4d] = %e.\n", j, x[j], j, y[j]); */
694 }
695 
714 static void eval_sum_clenshaw_transposed(int N, int M, double _Complex* a, double *x,
715  double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam,
716  double lambda)
717 {
718  int j,k;
719  double _Complex* it1 = temp;
720  double _Complex* it2 = y;
721  double _Complex aux;
722 
723  /* Compute final result by multiplying with the constant lambda */
724  a[0] = 0.0;
725  for (j = 0; j <= M; j++)
726  {
727  it2[j] = lambda * y[j];
728  a[0] += it2[j];
729  }
730 
731  if (N > 0)
732  {
733  /* Compute final step. */
734  a[1] = 0.0;
735  for (j = 0; j <= M; j++)
736  {
737  it1[j] = it2[j];
738  it2[j] = it2[j] * (alpha[0] * x[j] + beta[0]);
739  a[1] += it2[j];
740  }
741 
742  for (k = 2; k <= N; k++)
743  {
744  a[k] = 0.0;
745  for (j = 0; j <= M; j++)
746  {
747  aux = it1[j];
748  it1[j] = it2[j];
749  it2[j] = it2[j]*(alpha[k-1] * x[j] + beta[k-1]) + gam[k-1] * aux;
750  a[k] += it2[j];
751  }
752  }
753  }
754 }
755 
756 
757 fpt_set fpt_init(const int M, const int t, const unsigned int flags)
758 {
760  int plength;
762  int tau;
764  int m;
765  int k;
766 #ifdef _OPENMP
767  int nthreads = X(get_num_threads)();
768 #endif
769 
770  /* Allocate memory for new DPT set. */
771  fpt_set_s *set = (fpt_set_s*)nfft_malloc(sizeof(fpt_set_s));
772 
773  /* Save parameters in structure. */
774  set->flags = flags;
775 
776  set->M = M;
777  set->t = t;
778  set->N = 1<<t;
779 
780  /* Allocate memory for M transforms. */
781  set->dpt = (fpt_data*) nfft_malloc(M*sizeof(fpt_data));
782 
783  /* Initialize with NULL pointer. */
784  for (m = 0; m < set->M; m++)
785  set->dpt[m].steps = 0;
786 
787  /* Create arrays with Chebyshev nodes. */
788 
789  /* Initialize array with Chebyshev coefficients for the polynomial x. This
790  * would be trivially an array containing a 1 as second entry with all other
791  * coefficients set to zero. In order to compensate for the multiplicative
792  * factor 2 introduced by the DCT-III, we set this coefficient to 0.5 here. */
793 
794  /* Allocate memory for array of pointers to node arrays. */
795  set->xcvecs = (double**) nfft_malloc((set->t)*sizeof(double*));
796  /* For each polynomial length starting with 4, compute the Chebyshev nodes
797  * using a DCT-III. */
798  plength = 4;
799  for (tau = 1; tau < t+1; tau++)
800  {
801  /* Allocate memory for current array. */
802  set->xcvecs[tau-1] = (double*) nfft_malloc(plength*sizeof(double));
803  for (k = 0; k < plength; k++)
804  {
805  set->xcvecs[tau-1][k] = cos(((k+0.5)*KPI)/plength);
806  }
807  plength = plength << 1;
808  }
809 
811  set->work = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
812  set->result = (double _Complex*) nfft_malloc((2*set->N)*sizeof(double _Complex));
813 
814  set->lengths = (int*) nfft_malloc((set->t/*-1*/)*sizeof(int));
815  set->plans_dct2 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
816  set->kindsr = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
817  set->kindsr[0] = FFTW_REDFT10;
818  set->kindsr[1] = FFTW_REDFT10;
819  for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
820  {
821  set->lengths[tau] = plength;
822 #ifdef _OPENMP
823 #pragma omp critical (nfft_omp_critical_fftw_plan)
824 {
825  fftw_plan_with_nthreads(nthreads);
826 #endif
827  set->plans_dct2[tau] =
828  fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
829  2, 1, (double*)set->result, NULL, 2, 1,set->kindsr,
830  0);
831 #ifdef _OPENMP
832 }
833 #endif
834  }
835 
836  /* Check if fast transform is activated. */
837  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
838  {
840  set->vec3 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
841  set->vec4 = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
842  set->z = (double _Complex*) nfft_malloc(set->N*sizeof(double _Complex));
843 
845  set->plans_dct3 = (fftw_plan*) nfft_malloc(sizeof(fftw_plan)*(set->t/*-1*/));
846  set->kinds = (fftw_r2r_kind*) nfft_malloc(2*sizeof(fftw_r2r_kind));
847  set->kinds[0] = FFTW_REDFT01;
848  set->kinds[1] = FFTW_REDFT01;
849  for (tau = 0, plength = 4; tau < set->t/*-1*/; tau++, plength<<=1)
850  {
851  set->lengths[tau] = plength;
852 #ifdef _OPENMP
853 #pragma omp critical (nfft_omp_critical_fftw_plan)
854 {
855  fftw_plan_with_nthreads(nthreads);
856 #endif
857  set->plans_dct3[tau] =
858  fftw_plan_many_r2r(1, &set->lengths[tau], 2, (double*)set->work, NULL,
859  2, 1, (double*)set->result, NULL, 2, 1, set->kinds,
860  0);
861 #ifdef _OPENMP
862 }
863 #endif
864  }
865  nfft_free(set->lengths);
866  nfft_free(set->kinds);
867  nfft_free(set->kindsr);
868  set->lengths = NULL;
869  set->kinds = NULL;
870  set->kindsr = NULL;
871  }
872 
873  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
874  {
875  set->xc_slow = (double*) nfft_malloc((set->N+1)*sizeof(double));
876  set->temp = (double _Complex*) nfft_malloc((set->N+1)*sizeof(double _Complex));
877  }
878 
879  /* Return the newly created DPT set. */
880  return set;
881 }
882 
883 void fpt_precompute(fpt_set set, const int m, double *alpha, double *beta,
884  double *gam, int k_start, const double threshold)
885 {
886 
887  int tau;
888  int l;
889  int plength;
891  int degree;
893  int firstl;
894  int lastl;
895  int plength_stab;
897  int degree_stab;
899  double *a11;
901  double *a12;
903  double *a21;
905  double *a22;
907  const double *calpha;
908  const double *cbeta;
909  const double *cgamma;
910  int needstab = 0;
911  int k_start_tilde;
912  int N_tilde;
913  int clength;
914  int clength_1;
915  int clength_2;
916  int t_stab, N_stab;
917  fpt_data *data;
918 
919  /* Get pointer to DPT data. */
920  data = &(set->dpt[m]);
921 
922  /* Check, if already precomputed. */
923  if (data->steps != NULL)
924  return;
925 
926  /* Save k_start. */
927  data->k_start = k_start;
928 
929  data->gamma_m1 = gam[0];
930 
931  /* Check if fast transform is activated. */
932  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
933  {
934  /* Save recursion coefficients. */
935  data->alphaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
936  data->betaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
937  data->gammaN = (double*) nfft_malloc((set->t-1)*sizeof(double _Complex));
938 
939  for (tau = 2; tau <= set->t; tau++)
940  {
941 
942  data->alphaN[tau-2] = alpha[1<<tau];
943  data->betaN[tau-2] = beta[1<<tau];
944  data->gammaN[tau-2] = gam[1<<tau];
945  }
946 
947  data->alpha_0 = alpha[1];
948  data->beta_0 = beta[1];
949 
950  k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
951  /*set->N*/);
952  N_tilde = N_TILDE(set->N);
953 
954  /* Allocate memory for the cascade with t = log_2(N) many levels. */
955  data->steps = (fpt_step**) nfft_malloc(sizeof(fpt_step*)*set->t);
956 
957  /* For tau = 1,...t compute the matrices U_{n,tau,l}. */
958  plength = 4;
959  for (tau = 1; tau < set->t; tau++)
960  {
961  /* Compute auxilliary values. */
962  degree = plength>>1;
963  /* Compute first l. */
964  firstl = FIRST_L(k_start_tilde,plength);
965  /* Compute last l. */
966  lastl = LAST_L(N_tilde,plength);
967 
968  /* Allocate memory for current level. This level will contain 2^{t-tau-1}
969  * many matrices. */
970  data->steps[tau] = (fpt_step*) nfft_malloc(sizeof(fpt_step)
971  * (lastl+1));
972 
973  /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
974  for (l = firstl; l <= lastl; l++)
975  {
976  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
977  {
978  //fprintf(stderr,"fpt_precompute(%d): symmetric step\n",m);
979  //fflush(stderr);
980  clength = plength/2;
981  }
982  else
983  {
984  clength = plength;
985  }
986 
987  /* Allocate memory for the components of U_{n,tau,l}. */
988  a11 = (double*) nfft_malloc(sizeof(double)*clength);
989  a12 = (double*) nfft_malloc(sizeof(double)*clength);
990  a21 = (double*) nfft_malloc(sizeof(double)*clength);
991  a22 = (double*) nfft_malloc(sizeof(double)*clength);
992 
993  /* Evaluate the associated polynomials at the 2^{tau+1} Chebyshev
994  * nodes. */
995 
996  /* Get the pointers to the three-term recurrence coeffcients. */
997  calpha = &(alpha[plength*l+1+1]);
998  cbeta = &(beta[plength*l+1+1]);
999  cgamma = &(gam[plength*l+1+1]);
1000 
1001  if (set->flags & FPT_NO_STABILIZATION)
1002  {
1003  /* Evaluate P_{2^{tau}-2}^n(\cdot,2^{tau+1}l+2). */
1004  calpha--;
1005  cbeta--;
1006  cgamma--;
1007  eval_clenshaw2(set->xcvecs[tau-1], a11, a21, clength, clength, degree-1, calpha, cbeta,
1008  cgamma);
1009  eval_clenshaw2(set->xcvecs[tau-1], a12, a22, clength, clength, degree, calpha, cbeta,
1010  cgamma);
1011  needstab = 0;
1012  }
1013  else
1014  {
1015  calpha--;
1016  cbeta--;
1017  cgamma--;
1018  /* Evaluate P_{2^{tau}-1}^n(\cdot,2^{tau+1}l+1). */
1019  needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a11, a21, clength,
1020  degree-1, calpha, cbeta, cgamma, threshold);
1021  if (needstab == 0)
1022  {
1023  /* Evaluate P_{2^{tau}}^n(\cdot,2^{tau+1}l+1). */
1024  needstab = eval_clenshaw_thresh2(set->xcvecs[tau-1], a12, a22, clength,
1025  degree, calpha, cbeta, cgamma, threshold);
1026  }
1027  }
1028 
1029 
1030  /* Check if stabilization needed. */
1031  if (needstab == 0)
1032  {
1033  data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
1034  data->steps[tau][l].a12 = (double**) nfft_malloc(sizeof(double*));
1035  data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
1036  data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
1037  data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
1038  /* No stabilization needed. */
1039  data->steps[tau][l].a11[0] = a11;
1040  data->steps[tau][l].a12[0] = a12;
1041  data->steps[tau][l].a21[0] = a21;
1042  data->steps[tau][l].a22[0] = a22;
1043  data->steps[tau][l].g[0] = gam[plength*l+1+1];
1044  data->steps[tau][l].stable = true;
1045  }
1046  else
1047  {
1048  /* Stabilize. */
1049  degree_stab = degree*(2*l+1);
1050  X(next_power_of_2_exp)((l+1)*(1<<(tau+1)),&N_stab,&t_stab);
1051 
1052  /* Old arrays are to small. */
1053  nfft_free(a11);
1054  nfft_free(a12);
1055  nfft_free(a21);
1056  nfft_free(a22);
1057 
1058  data->steps[tau][l].a11 = (double**) nfft_malloc(sizeof(double*));
1059  data->steps[tau][l].a12 = (double**)nfft_malloc(sizeof(double*));
1060  data->steps[tau][l].a21 = (double**) nfft_malloc(sizeof(double*));
1061  data->steps[tau][l].a22 = (double**) nfft_malloc(sizeof(double*));
1062  data->steps[tau][l].g = (double*) nfft_malloc(sizeof(double));
1063 
1064  plength_stab = N_stab;
1065 
1066  if (set->flags & FPT_AL_SYMMETRY)
1067  {
1068  if (m <= 1)
1069  {
1070  /* This should never be executed */
1071  clength_1 = plength_stab;
1072  clength_2 = plength_stab;
1073  /* Allocate memory for arrays. */
1074  a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
1075  a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
1076  a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
1077  a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
1078  /* Get the pointers to the three-term recurrence coeffcients. */
1079  calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1080  eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1,
1081  clength_2, degree_stab-1, calpha, cbeta, cgamma);
1082  eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1,
1083  clength_2, degree_stab+0, calpha, cbeta, cgamma);
1084  }
1085  else
1086  {
1087  clength = plength_stab/2;
1088  if (m%2 == 0)
1089  {
1090  a11 = (double*) nfft_malloc(sizeof(double)*clength);
1091  a12 = (double*) nfft_malloc(sizeof(double)*clength);
1092  a21 = 0;
1093  a22 = 0;
1094  calpha = &(alpha[2]); cbeta = &(beta[2]); cgamma = &(gam[2]);
1095  eval_clenshaw(set->xcvecs[t_stab-2], a11, clength,
1096  degree_stab-2, calpha, cbeta, cgamma);
1097  eval_clenshaw(set->xcvecs[t_stab-2], a12, clength,
1098  degree_stab-1, calpha, cbeta, cgamma);
1099  }
1100  else
1101  {
1102  a11 = 0;
1103  a12 = 0;
1104  a21 = (double*) nfft_malloc(sizeof(double)*clength);
1105  a22 = (double*) nfft_malloc(sizeof(double)*clength);
1106  calpha = &(alpha[1]); cbeta = &(beta[1]); cgamma = &(gam[1]);
1107  eval_clenshaw(set->xcvecs[t_stab-2], a21, clength,
1108  degree_stab-1,calpha, cbeta, cgamma);
1109  eval_clenshaw(set->xcvecs[t_stab-2], a22, clength,
1110  degree_stab+0, calpha, cbeta, cgamma);
1111  }
1112  }
1113  }
1114  else
1115  {
1116  clength_1 = plength_stab;
1117  clength_2 = plength_stab;
1118  a11 = (double*) nfft_malloc(sizeof(double)*clength_1);
1119  a12 = (double*) nfft_malloc(sizeof(double)*clength_1);
1120  a21 = (double*) nfft_malloc(sizeof(double)*clength_2);
1121  a22 = (double*) nfft_malloc(sizeof(double)*clength_2);
1122  calpha = &(alpha[2]);
1123  cbeta = &(beta[2]);
1124  cgamma = &(gam[2]);
1125  calpha--;
1126  cbeta--;
1127  cgamma--;
1128  eval_clenshaw2(set->xcvecs[t_stab-2], a11, a21, clength_1, clength_2, degree_stab-1,
1129  calpha, cbeta, cgamma);
1130  eval_clenshaw2(set->xcvecs[t_stab-2], a12, a22, clength_1, clength_2, degree_stab+0,
1131  calpha, cbeta, cgamma);
1132 
1133  }
1134  data->steps[tau][l].a11[0] = a11;
1135  data->steps[tau][l].a12[0] = a12;
1136  data->steps[tau][l].a21[0] = a21;
1137  data->steps[tau][l].a22[0] = a22;
1138 
1139  data->steps[tau][l].g[0] = gam[1+1];
1140  data->steps[tau][l].stable = false;
1141  data->steps[tau][l].ts = t_stab;
1142  data->steps[tau][l].Ns = N_stab;
1143  }
1144  }
1146  plength = plength << 1;
1147  }
1148  }
1149 
1150  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1151  {
1152  /* Check, if recurrence coefficients must be copied. */
1153  if (set->flags & FPT_PERSISTENT_DATA)
1154  {
1155  data->_alpha = (double*) alpha;
1156  data->_beta = (double*) beta;
1157  data->_gamma = (double*) gam;
1158  }
1159  else
1160  {
1161  data->_alpha = (double*) nfft_malloc((set->N+1)*sizeof(double));
1162  data->_beta = (double*) nfft_malloc((set->N+1)*sizeof(double));
1163  data->_gamma = (double*) nfft_malloc((set->N+1)*sizeof(double));
1164  memcpy(data->_alpha,alpha,(set->N+1)*sizeof(double));
1165  memcpy(data->_beta,beta,(set->N+1)*sizeof(double));
1166  memcpy(data->_gamma,gam,(set->N+1)*sizeof(double));
1167  }
1168  }
1169 }
1170 
1171 void fpt_trafo_direct(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
1172  const int k_end, const unsigned int flags)
1173 {
1174  int j;
1175  fpt_data *data = &(set->dpt[m]);
1176  int Nk;
1177  int tk;
1178  double norm;
1179 
1180  //fprintf(stderr, "Executing dpt.\n");
1181 
1182  X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1183  norm = 2.0/(Nk<<1);
1184 
1185  //fprintf(stderr, "Norm = %e.\n", norm);
1186 
1187  if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1188  {
1189  return;
1190  }
1191 
1192  if (flags & FPT_FUNCTION_VALUES)
1193  {
1194  /* Fill array with Chebyshev nodes. */
1195  for (j = 0; j <= k_end; j++)
1196  {
1197  set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1198  //fprintf(stderr, "x[%4d] = %e.\n", j, set->xc_slow[j]);
1199  }
1200 
1201  memset(set->result,0U,data->k_start*sizeof(double _Complex));
1202  memcpy(&set->result[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
1203 
1204  /*eval_sum_clenshaw(k_end, k_end, set->result, set->xc_slow,
1205  y, set->work, &data->alpha[1], &data->beta[1], &data->gamma[1],
1206  data->gamma_m1);*/
1207  eval_sum_clenshaw_fast(k_end, k_end, set->result, set->xc_slow,
1208  y, &data->_alpha[1], &data->_beta[1], &data->_gamma[1], data->gamma_m1);
1209  }
1210  else
1211  {
1212  memset(set->temp,0U,data->k_start*sizeof(double _Complex));
1213  memcpy(&set->temp[data->k_start],x,(k_end-data->k_start+1)*sizeof(double _Complex));
1214 
1215  eval_sum_clenshaw_fast(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1216  set->result, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1217  data->gamma_m1);
1218 
1219  fftw_execute_r2r(set->plans_dct2[tk-2],(double*)set->result,
1220  (double*)set->result);
1221 
1222  set->result[0] *= 0.5;
1223  for (j = 0; j < Nk; j++)
1224  {
1225  set->result[j] *= norm;
1226  }
1227 
1228  memcpy(y,set->result,(k_end+1)*sizeof(double _Complex));
1229  }
1230 }
1231 
1232 void fpt_trafo(fpt_set set, const int m, const double _Complex *x, double _Complex *y,
1233  const int k_end, const unsigned int flags)
1234 {
1235  /* Get transformation data. */
1236  fpt_data *data = &(set->dpt[m]);
1238  int Nk;
1240  int tk;
1242  int k_start_tilde;
1244  int k_end_tilde;
1245 
1247  int tau;
1249  int firstl;
1251  int lastl;
1253  int l;
1255  int plength;
1257  int plength_stab;
1258  int t_stab;
1260  fpt_step *step;
1262  fftw_plan plan = 0;
1263  int length = k_end+1;
1264  fftw_r2r_kind kinds[2] = {FFTW_REDFT01,FFTW_REDFT01};
1265 
1267  int k;
1268 
1269  double _Complex *work_ptr;
1270  const double _Complex *x_ptr;
1271 
1272  /* Check, if slow transformation should be used due to small bandwidth. */
1273  if (k_end < FPT_BREAK_EVEN)
1274  {
1275  /* Use NDSFT. */
1276  fpt_trafo_direct(set, m, x, y, k_end, flags);
1277  return;
1278  }
1279 
1280  X(next_power_of_2_exp)(k_end,&Nk,&tk);
1281  k_start_tilde = K_START_TILDE(data->k_start,Nk);
1282  k_end_tilde = K_END_TILDE(k_end,Nk);
1283 
1284  /* Check if fast transform is activated. */
1285  if (set->flags & FPT_NO_FAST_ALGORITHM)
1286  return;
1287 
1288  if (flags & FPT_FUNCTION_VALUES)
1289  {
1290 #ifdef _OPENMP
1291  int nthreads = X(get_num_threads)();
1292 #pragma omp critical (nfft_omp_critical_fftw_plan)
1293 {
1294  fftw_plan_with_nthreads(nthreads);
1295 #endif
1296  plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
1297  (double*)set->work, NULL, 2, 1, kinds, 0U);
1298 #ifdef _OPENMP
1299 }
1300 #endif
1301  }
1302 
1303  /* Initialize working arrays. */
1304  memset(set->result,0U,2*Nk*sizeof(double _Complex));
1305 
1306  /* The first step. */
1307 
1308  /* Set the first 2*data->k_start coefficients to zero. */
1309  memset(set->work,0U,2*data->k_start*sizeof(double _Complex));
1310 
1311  work_ptr = &set->work[2*data->k_start];
1312  x_ptr = x;
1313 
1314  for (k = 0; k <= k_end_tilde-data->k_start; k++)
1315  {
1316  *work_ptr++ = *x_ptr++;
1317  *work_ptr++ = K(0.0);
1318  }
1319 
1320  /* Set the last 2*(set->N-1-k_end_tilde) coefficients to zero. */
1321  memset(&set->work[2*(k_end_tilde+1)],0U,2*(Nk-1-k_end_tilde)*sizeof(double _Complex));
1322 
1323  /* If k_end == Nk, use three-term recurrence to map last coefficient x_{Nk} to
1324  * x_{Nk-1} and x_{Nk-2}. */
1325  if (k_end == Nk)
1326  {
1327  set->work[2*(Nk-2)] += data->gammaN[tk-2]*x[Nk-data->k_start];
1328  set->work[2*(Nk-1)] += data->betaN[tk-2]*x[Nk-data->k_start];
1329  set->work[2*(Nk-1)+1] = data->alphaN[tk-2]*x[Nk-data->k_start];
1330  }
1331 
1332  /* Compute the remaining steps. */
1333  plength = 4;
1334  for (tau = 1; tau < tk; tau++)
1335  {
1336  /* Compute first l. */
1337  firstl = FIRST_L(k_start_tilde,plength);
1338  /* Compute last l. */
1339  lastl = LAST_L(k_end_tilde,plength);
1340 
1341  /* Compute the multiplication steps. */
1342  for (l = firstl; l <= lastl; l++)
1343  {
1344  /* Copy vectors to multiply into working arrays zero-padded to twice the length. */
1345  memcpy(set->vec3,&(set->work[(plength/2)*(4*l+2)]),(plength/2)*sizeof(double _Complex));
1346  memcpy(set->vec4,&(set->work[(plength/2)*(4*l+3)]),(plength/2)*sizeof(double _Complex));
1347  memset(&set->vec3[plength/2],0U,(plength/2)*sizeof(double _Complex));
1348  memset(&set->vec4[plength/2],0U,(plength/2)*sizeof(double _Complex));
1349 
1350  /* Copy coefficients into first half. */
1351  memcpy(&(set->work[(plength/2)*(4*l+2)]),&(set->work[(plength/2)*(4*l+1)]),(plength/2)*sizeof(double _Complex));
1352  memset(&(set->work[(plength/2)*(4*l+1)]),0U,(plength/2)*sizeof(double _Complex));
1353  memset(&(set->work[(plength/2)*(4*l+3)]),0U,(plength/2)*sizeof(double _Complex));
1354 
1355  /* Get matrix U_{n,tau,l} */
1356  step = &(data->steps[tau][l]);
1357 
1358  /* Check if step is stable. */
1359  if (step->stable)
1360  {
1361  /* Check, if we should do a symmetrizised step. */
1362  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1363  {
1364  /*for (k = 0; k < plength; k++)
1365  {
1366  fprintf(stderr,"fpt_trafo: a11 = %le, a12 = %le, a21 = %le, a22 = %le\n",
1367  step->a11[0][k],step->a12[0][k],step->a21[0][k],step->a22[0][k]);
1368  }*/
1369  /* Multiply third and fourth polynomial with matrix U. */
1370  //fprintf(stderr,"\nhallo\n");
1371  fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0],
1372  step->a12[0], step->a21[0], step->a22[0], step->g[0], tau, set);
1373  }
1374  else
1375  {
1376  /* Multiply third and fourth polynomial with matrix U. */
1377  fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1378  step->a21[0], step->a22[0], step->g[0], tau, set);
1379  }
1380 
1381  if (step->g[0] != 0.0)
1382  {
1383  for (k = 0; k < plength; k++)
1384  {
1385  set->work[plength*2*l+k] += set->vec3[k];
1386  }
1387  }
1388  for (k = 0; k < plength; k++)
1389  {
1390  set->work[plength*(2*l+1)+k] += set->vec4[k];
1391  }
1392  }
1393  else
1394  {
1395  /* Stabilize. */
1396 
1397  /* The lengh of the polynomials */
1398  plength_stab = step->Ns;
1399  t_stab = step->ts;
1400 
1401  /*---------*/
1402  /*fprintf(stderr,"\nfpt_trafo: stabilizing at tau = %d, l = %d.\n",tau,l);
1403  fprintf(stderr,"\nfpt_trafo: plength_stab = %d.\n",plength_stab);
1404  fprintf(stderr,"\nfpt_trafo: tk = %d.\n",tk);
1405  fprintf(stderr,"\nfpt_trafo: index = %d.\n",tk-tau-1);*/
1406  /*---------*/
1407 
1408  /* Set rest of vectors explicitely to zero */
1409  /*fprintf(stderr,"fpt_trafo: stabilizing: plength = %d, plength_stab = %d\n",
1410  plength, plength_stab);*/
1411  memset(&set->vec3[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
1412  memset(&set->vec4[plength/2],0U,(plength_stab-plength/2)*sizeof(double _Complex));
1413 
1414  /* Multiply third and fourth polynomial with matrix U. */
1415  /* Check for symmetry. */
1416  if (set->flags & FPT_AL_SYMMETRY)
1417  {
1418  if (m <= 1)
1419  {
1420  fpt_do_step_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1421  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1422  }
1423  else if (m%2 == 0)
1424  {
1425  /*fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1426  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1427  fpt_do_step_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1428  step->a21[0], step->a22[0],
1429  set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1430  /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1431  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1432  }
1433  else
1434  {
1435  /*fpt_do_step_symmetric_l(set->vec3, set->vec4, step->a11[0], step->a12[0],
1436  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1437  fpt_do_step_symmetric_l(set->vec3, set->vec4,
1438  step->a11[0], step->a12[0],
1439  step->a21[0],
1440  step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1441  /*fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1442  step->a21[0], step->a22[0], step->gamma[0], t_stab-1, set);*/
1443  }
1444  }
1445  else
1446  {
1447  fpt_do_step(set->vec3, set->vec4, step->a11[0], step->a12[0],
1448  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1449  }
1450 
1451  if (step->g[0] != 0.0)
1452  {
1453  for (k = 0; k < plength_stab; k++)
1454  {
1455  set->result[k] += set->vec3[k];
1456  }
1457  }
1458 
1459  for (k = 0; k < plength_stab; k++)
1460  {
1461  set->result[Nk+k] += set->vec4[k];
1462  }
1463  }
1464  }
1465  /* Double length of polynomials. */
1466  plength = plength<<1;
1467 
1468  /*--------*/
1469  /*for (k = 0; k < 2*Nk; k++)
1470  {
1471  fprintf(stderr,"work[%2d] = %le + I*%le\tresult[%2d] = %le + I*%le\n",
1472  k,creal(set->work[k]),cimag(set->work[k]),k,creal(set->result[k]),
1473  cimag(set->result[k]));
1474  }*/
1475  /*--------*/
1476  }
1477 
1478  /* Add the resulting cascade coeffcients to the coeffcients accumulated from
1479  * the stabilization steps. */
1480  for (k = 0; k < 2*Nk; k++)
1481  {
1482  set->result[k] += set->work[k];
1483  }
1484 
1485  /* The last step. Compute the Chebyshev coeffcients c_k^n from the
1486  * polynomials in front of P_0^n and P_1^n. */
1487  y[0] = data->gamma_m1*(set->result[0] + data->beta_0*set->result[Nk] +
1488  data->alpha_0*set->result[Nk+1]*0.5);
1489  y[1] = data->gamma_m1*(set->result[1] + data->beta_0*set->result[Nk+1]+
1490  data->alpha_0*(set->result[Nk]+set->result[Nk+2]*0.5));
1491  y[k_end-1] = data->gamma_m1*(set->result[k_end-1] +
1492  data->beta_0*set->result[Nk+k_end-1] +
1493  data->alpha_0*set->result[Nk+k_end-2]*0.5);
1494  y[k_end] = data->gamma_m1*(0.5*data->alpha_0*set->result[Nk+k_end-1]);
1495  for (k = 2; k <= k_end-2; k++)
1496  {
1497  y[k] = data->gamma_m1*(set->result[k] + data->beta_0*set->result[Nk+k] +
1498  data->alpha_0*0.5*(set->result[Nk+k-1]+set->result[Nk+k+1]));
1499  }
1500 
1501  if (flags & FPT_FUNCTION_VALUES)
1502  {
1503  y[0] *= 2.0;
1504  fftw_execute_r2r(plan,(double*)y,(double*)y);
1505 #ifdef _OPENMP
1506  #pragma omp critical (nfft_omp_critical_fftw_plan)
1507 #endif
1508  fftw_destroy_plan(plan);
1509  for (k = 0; k <= k_end; k++)
1510  {
1511  y[k] *= 0.5;
1512  }
1513  }
1514 }
1515 
1516 void fpt_transposed_direct(fpt_set set, const int m, double _Complex *x,
1517  double _Complex *y, const int k_end, const unsigned int flags)
1518 {
1519  int j;
1520  fpt_data *data = &(set->dpt[m]);
1521  int Nk;
1522  int tk;
1523  double norm;
1524 
1525  X(next_power_of_2_exp)(k_end+1,&Nk,&tk);
1526  norm = 2.0/(Nk<<1);
1527 
1528  if (set->flags & FPT_NO_DIRECT_ALGORITHM)
1529  {
1530  return;
1531  }
1532 
1533  if (flags & FPT_FUNCTION_VALUES)
1534  {
1535  for (j = 0; j <= k_end; j++)
1536  {
1537  set->xc_slow[j] = cos((KPI*(j+0.5))/(k_end+1));
1538  }
1539 
1540  eval_sum_clenshaw_transposed(k_end, k_end, set->result, set->xc_slow,
1541  y, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1542  data->gamma_m1);
1543 
1544  memcpy(x,&set->result[data->k_start],(k_end-data->k_start+1)*
1545  sizeof(double _Complex));
1546  }
1547  else
1548  {
1549  memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
1550  memset(&set->result[k_end+1],0U,(Nk-k_end-1)*sizeof(double _Complex));
1551 
1552  for (j = 0; j < Nk; j++)
1553  {
1554  set->result[j] *= norm;
1555  }
1556 
1557  fftw_execute_r2r(set->plans_dct3[tk-2],(double*)set->result,
1558  (double*)set->result);
1559 
1560  eval_sum_clenshaw_transposed(k_end, Nk-1, set->temp, set->xcvecs[tk-2],
1561  set->result, set->work, &data->_alpha[1], &data->_beta[1], &data->_gamma[1],
1562  data->gamma_m1);
1563 
1564  memcpy(x,&set->temp[data->k_start],(k_end-data->k_start+1)*sizeof(double _Complex));
1565  }
1566 }
1567 
1568 void fpt_transposed(fpt_set set, const int m, double _Complex *x,
1569  double _Complex *y, const int k_end, const unsigned int flags)
1570 {
1571  /* Get transformation data. */
1572  fpt_data *data = &(set->dpt[m]);
1574  int Nk;
1576  int tk;
1578  int k_start_tilde;
1580  int k_end_tilde;
1581 
1583  int tau;
1585  int firstl;
1587  int lastl;
1589  int l;
1591  int plength;
1593  int plength_stab;
1595  fpt_step *step;
1597  fftw_plan plan;
1598  int length = k_end+1;
1599  fftw_r2r_kind kinds[2] = {FFTW_REDFT10,FFTW_REDFT10};
1601  int k;
1602  int t_stab;
1603 
1604  /* Check, if slow transformation should be used due to small bandwidth. */
1605  if (k_end < FPT_BREAK_EVEN)
1606  {
1607  /* Use NDSFT. */
1608  fpt_transposed_direct(set, m, x, y, k_end, flags);
1609  return;
1610  }
1611 
1612  X(next_power_of_2_exp)(k_end,&Nk,&tk);
1613  k_start_tilde = K_START_TILDE(data->k_start,Nk);
1614  k_end_tilde = K_END_TILDE(k_end,Nk);
1615 
1616  /* Check if fast transform is activated. */
1617  if (set->flags & FPT_NO_FAST_ALGORITHM)
1618  {
1619  return;
1620  }
1621 
1622  if (flags & FPT_FUNCTION_VALUES)
1623  {
1624 #ifdef _OPENMP
1625  int nthreads = X(get_num_threads)();
1626 #pragma omp critical (nfft_omp_critical_fftw_plan)
1627 {
1628  fftw_plan_with_nthreads(nthreads);
1629 #endif
1630  plan = fftw_plan_many_r2r(1, &length, 2, (double*)set->work, NULL, 2, 1,
1631  (double*)set->work, NULL, 2, 1, kinds, 0U);
1632 #ifdef _OPENMP
1633 }
1634 #endif
1635  fftw_execute_r2r(plan,(double*)y,(double*)set->result);
1636 #ifdef _OPENMP
1637  #pragma omp critical (nfft_omp_critical_fftw_plan)
1638 #endif
1639  fftw_destroy_plan(plan);
1640  for (k = 0; k <= k_end; k++)
1641  {
1642  set->result[k] *= 0.5;
1643  }
1644  }
1645  else
1646  {
1647  memcpy(set->result,y,(k_end+1)*sizeof(double _Complex));
1648  }
1649 
1650  /* Initialize working arrays. */
1651  memset(set->work,0U,2*Nk*sizeof(double _Complex));
1652 
1653  /* The last step is now the first step. */
1654  for (k = 0; k <= k_end; k++)
1655  {
1656  set->work[k] = data->gamma_m1*set->result[k];
1657  }
1658  //memset(&set->work[k_end+1],0U,(Nk+1-k_end)*sizeof(double _Complex));
1659 
1660  set->work[Nk] = data->gamma_m1*(data->beta_0*set->result[0] +
1661  data->alpha_0*set->result[1]);
1662  for (k = 1; k < k_end; k++)
1663  {
1664  set->work[Nk+k] = data->gamma_m1*(data->beta_0*set->result[k] +
1665  data->alpha_0*0.5*(set->result[k-1]+set->result[k+1]));
1666  }
1667  if (k_end<Nk)
1668  {
1669  memset(&set->work[k_end],0U,(Nk-k_end)*sizeof(double _Complex));
1670  }
1671 
1673  memcpy(set->result,set->work,2*Nk*sizeof(double _Complex));
1674 
1675  /* Compute the remaining steps. */
1676  plength = Nk;
1677  for (tau = tk-1; tau >= 1; tau--)
1678  {
1679  /* Compute first l. */
1680  firstl = FIRST_L(k_start_tilde,plength);
1681  /* Compute last l. */
1682  lastl = LAST_L(k_end_tilde,plength);
1683 
1684  /* Compute the multiplication steps. */
1685  for (l = firstl; l <= lastl; l++)
1686  {
1687  /* Initialize second half of coefficient arrays with zeros. */
1688  memcpy(set->vec3,&(set->work[(plength/2)*(4*l+0)]),plength*sizeof(double _Complex));
1689  memcpy(set->vec4,&(set->work[(plength/2)*(4*l+2)]),plength*sizeof(double _Complex));
1690 
1691  memcpy(&set->work[(plength/2)*(4*l+1)],&(set->work[(plength/2)*(4*l+2)]),
1692  (plength/2)*sizeof(double _Complex));
1693 
1694  /* Get matrix U_{n,tau,l} */
1695  step = &(data->steps[tau][l]);
1696 
1697  /* Check if step is stable. */
1698  if (step->stable)
1699  {
1700  if (set->flags & FPT_AL_SYMMETRY && IS_SYMMETRIC(l,m,plength))
1701  {
1702  /* Multiply third and fourth polynomial with matrix U. */
1703  fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1704  step->a21[0], step->a22[0], step->g[0], tau, set);
1705  }
1706  else
1707  {
1708  /* Multiply third and fourth polynomial with matrix U. */
1709  fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1710  step->a21[0], step->a22[0], step->g[0], tau, set);
1711  }
1712  memcpy(&(set->vec3[plength/2]), set->vec4,(plength/2)*sizeof(double _Complex));
1713 
1714  for (k = 0; k < plength; k++)
1715  {
1716  set->work[plength*(4*l+2)/2+k] = set->vec3[k];
1717  }
1718  }
1719  else
1720  {
1721  /* Stabilize. */
1722  plength_stab = step->Ns;
1723  t_stab = step->ts;
1724 
1725  memcpy(set->vec3,set->result,plength_stab*sizeof(double _Complex));
1726  memcpy(set->vec4,&(set->result[Nk]),plength_stab*sizeof(double _Complex));
1727 
1728  /* Multiply third and fourth polynomial with matrix U. */
1729  if (set->flags & FPT_AL_SYMMETRY)
1730  {
1731  if (m <= 1)
1732  {
1733  fpt_do_step_t_symmetric(set->vec3, set->vec4, step->a11[0], step->a12[0],
1734  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1735  }
1736  else if (m%2 == 0)
1737  {
1738  fpt_do_step_t_symmetric_u(set->vec3, set->vec4, step->a11[0], step->a12[0],
1739  set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1740  }
1741  else
1742  {
1743  fpt_do_step_t_symmetric_l(set->vec3, set->vec4,
1744  step->a21[0], step->a22[0], set->xcvecs[t_stab-2], step->g[0], t_stab-1, set);
1745  }
1746  }
1747  else
1748  {
1749  fpt_do_step_t(set->vec3, set->vec4, step->a11[0], step->a12[0],
1750  step->a21[0], step->a22[0], step->g[0], t_stab-1, set);
1751  }
1752 
1753  memcpy(&(set->vec3[plength/2]),set->vec4,(plength/2)*sizeof(double _Complex));
1754 
1755  for (k = 0; k < plength; k++)
1756  {
1757  set->work[(plength/2)*(4*l+2)+k] = set->vec3[k];
1758  }
1759  }
1760  }
1761  /* Half the length of polynomial arrays. */
1762  plength = plength>>1;
1763  }
1764 
1765  /* First step */
1766  for (k = 0; k <= k_end_tilde-data->k_start; k++)
1767  {
1768  x[k] = set->work[2*(data->k_start+k)];
1769  }
1770  if (k_end == Nk)
1771  {
1772  x[Nk-data->k_start] =
1773  data->gammaN[tk-2]*set->work[2*(Nk-2)]
1774  + data->betaN[tk-2] *set->work[2*(Nk-1)]
1775  + data->alphaN[tk-2]*set->work[2*(Nk-1)+1];
1776  }
1777 }
1778 
1779 void fpt_finalize(fpt_set set)
1780 {
1781  int tau;
1782  int l;
1783  int m;
1784  fpt_data *data;
1785  int k_start_tilde;
1786  int N_tilde;
1787  int firstl, lastl;
1788  int plength;
1789  const int M = set->M;
1790 
1791  /* TODO Clean up DPT transform data structures. */
1792  for (m = 0; m < M; m++)
1793  {
1794  /* Check if precomputed. */
1795  data = &set->dpt[m];
1796  if (data->steps != (fpt_step**)NULL)
1797  {
1798  nfft_free(data->alphaN);
1799  nfft_free(data->betaN);
1800  nfft_free(data->gammaN);
1801  data->alphaN = NULL;
1802  data->betaN = NULL;
1803  data->gammaN = NULL;
1804 
1805  /* Free precomputed data. */
1806  k_start_tilde = K_START_TILDE(data->k_start,X(next_power_of_2)(data->k_start)
1807  /*set->N*/);
1808  N_tilde = N_TILDE(set->N);
1809  plength = 4;
1810  for (tau = 1; tau < set->t; tau++)
1811  {
1812  /* Compute first l. */
1813  firstl = FIRST_L(k_start_tilde,plength);
1814  /* Compute last l. */
1815  lastl = LAST_L(N_tilde,plength);
1816 
1817  /* For l = 0,...2^{t-tau-1}-1 compute the matrices U_{n,tau,l}. */
1818  for (l = firstl; l <= lastl; l++)
1819  {
1820  /* Free components. */
1821  nfft_free(data->steps[tau][l].a11[0]);
1822  nfft_free(data->steps[tau][l].a12[0]);
1823  nfft_free(data->steps[tau][l].a21[0]);
1824  nfft_free(data->steps[tau][l].a22[0]);
1825  data->steps[tau][l].a11[0] = NULL;
1826  data->steps[tau][l].a12[0] = NULL;
1827  data->steps[tau][l].a21[0] = NULL;
1828  data->steps[tau][l].a22[0] = NULL;
1829  /* Free components. */
1830  nfft_free(data->steps[tau][l].a11);
1831  nfft_free(data->steps[tau][l].a12);
1832  nfft_free(data->steps[tau][l].a21);
1833  nfft_free(data->steps[tau][l].a22);
1834  nfft_free(data->steps[tau][l].g);
1835  data->steps[tau][l].a11 = NULL;
1836  data->steps[tau][l].a12 = NULL;
1837  data->steps[tau][l].a21 = NULL;
1838  data->steps[tau][l].a22 = NULL;
1839  data->steps[tau][l].g = NULL;
1840  }
1841  /* Free pointers for current level. */
1842  nfft_free(data->steps[tau]);
1843  data->steps[tau] = NULL;
1844  /* Double length of polynomials. */
1845  plength = plength<<1;
1846  }
1847  /* Free steps. */
1848  nfft_free(data->steps);
1849  data->steps = NULL;
1850  }
1851 
1852  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1853  {
1854  /* Check, if recurrence coefficients must be copied. */
1855  //fprintf(stderr,"\nfpt_finalize: %d\n",set->flags & FPT_PERSISTENT_DATA);
1856  if (!(set->flags & FPT_PERSISTENT_DATA))
1857  {
1858  nfft_free(data->_alpha);
1859  nfft_free(data->_beta);
1860  nfft_free(data->_gamma);
1861  }
1862  data->_alpha = NULL;
1863  data->_beta = NULL;
1864  data->_gamma = NULL;
1865  }
1866  }
1867 
1868  /* Delete array of DPT transform data. */
1869  nfft_free(set->dpt);
1870  set->dpt = NULL;
1871 
1872  for (tau = 1; tau < set->t+1; tau++)
1873  {
1874  nfft_free(set->xcvecs[tau-1]);
1875  set->xcvecs[tau-1] = NULL;
1876  }
1877  nfft_free(set->xcvecs);
1878  set->xcvecs = NULL;
1879 
1880  /* Free auxilliary arrays. */
1881  nfft_free(set->work);
1882  nfft_free(set->result);
1883 
1884  /* Check if fast transform is activated. */
1885  if (!(set->flags & FPT_NO_FAST_ALGORITHM))
1886  {
1887  /* Free auxilliary arrays. */
1888  nfft_free(set->vec3);
1889  nfft_free(set->vec4);
1890  nfft_free(set->z);
1891  set->work = NULL;
1892  set->result = NULL;
1893  set->vec3 = NULL;
1894  set->vec4 = NULL;
1895  set->z = NULL;
1896 
1897  /* Free FFTW plans. */
1898  for(tau = 0; tau < set->t/*-1*/; tau++)
1899  {
1900 #ifdef _OPENMP
1901 #pragma omp critical (nfft_omp_critical_fftw_plan)
1902 #endif
1903 {
1904  fftw_destroy_plan(set->plans_dct3[tau]);
1905  fftw_destroy_plan(set->plans_dct2[tau]);
1906 }
1907  set->plans_dct3[tau] = NULL;
1908  set->plans_dct2[tau] = NULL;
1909  }
1910 
1911  nfft_free(set->plans_dct3);
1912  nfft_free(set->plans_dct2);
1913  set->plans_dct3 = NULL;
1914  set->plans_dct2 = NULL;
1915  }
1916 
1917  if (!(set->flags & FPT_NO_DIRECT_ALGORITHM))
1918  {
1919  /* Delete arrays of Chebyshev nodes. */
1920  nfft_free(set->xc_slow);
1921  set->xc_slow = NULL;
1922  nfft_free(set->temp);
1923  set->temp = NULL;
1924  }
1925 
1926  /* Free DPT set structure. */
1927  nfft_free(set);
1928 }
Holds data for a single multiplication step in the cascade summation.
Definition: fpt.c:63
double * xc
Array for Chebychev-nodes.
Definition: fpt.c:107
bool stable
Indicates if the values contained represent a fast or a slow stabilized step.
Definition: fpt.c:65
double * alphaN
TODO Add comment here.
Definition: fpt.c:81
int M
The number of DPT transforms.
Definition: fpt.c:99
double * _alpha
< TODO Add comment here.
Definition: fpt.c:88
int * lengths
Transform lengths for fftw library.
Definition: fpt.c:123
double alpha_0
TODO Add comment here.
Definition: fpt.c:84
int t
The exponent of N.
Definition: fpt.c:102
Holds data for a set of cascade summations.
Definition: fpt.c:96
#define LAST_L(x, y)
Index of last block of four functions at level.
Definition: fpt.c:52
#define K_START_TILDE(x, y)
Minimum degree at top of a cascade.
Definition: fpt.c:43
fftw_r2r_kind * kinds
Transform kinds for fftw library.
Definition: fpt.c:118
void nfft_free(void *p)
fpt_data * dpt
The DPT transform data.
Definition: fpt.c:103
fftw_plan * plans_dct3
Transform plans for the fftw library.
Definition: fpt.c:114
double * gammaN
TODO Add comment here.
Definition: fpt.c:83
#define X(name)
Include header for C99 complex datatype.
Definition: fastsum.h:53
int N
The transform length.
Definition: fpt.c:100
Holds data for a single cascade summation.
Definition: fpt.c:77
fftw_plan * plans_dct2
Transform plans for the fftw library.
Definition: fpt.c:116
#define UNUSED(x)
Dummy use of unused parameters to silence compiler warnings.
Definition: infft.h:1383
struct fpt_set_s_ fpt_set_s
Holds data for a set of cascade summations.
int Ns
TODO Add comment here.
Definition: fpt.c:68
double * _beta
TODO Add comment here.
Definition: fpt.c:89
void * nfft_malloc(size_t n)
double beta_0
TODO Add comment here.
Definition: fpt.c:85
static void eval_sum_clenshaw_transposed(int N, int M, double _Complex *a, double *x, double _Complex *y, double _Complex *temp, double *alpha, double *beta, double *gam, double lambda)
Clenshaw algorithm.
Definition: fpt.c:714
double * _gamma
TODO Add comment here.
Definition: fpt.c:90
double ** a22
The matrix components.
Definition: fpt.c:70
fpt_step ** steps
The cascade summation steps.
Definition: fpt.c:79
double gamma_m1
TODO Add comment here.
Definition: fpt.c:86
#define FIRST_L(x, y)
Index of first block of four functions at level.
Definition: fpt.c:49
int k_start
TODO Add comment here.
Definition: fpt.c:80
int flags
The flags.
Definition: fpt.c:98
int ts
TODO Add comment here.
Definition: fpt.c:69
double * betaN
TODO Add comment here.
Definition: fpt.c:82
struct fpt_data_ fpt_data
Holds data for a single cascade summation.
struct fpt_step_ fpt_step
Holds data for a single multiplication step in the cascade summation.
#define K_END_TILDE(x, y)
Maximum degree at top of a cascade.
Definition: fpt.c:46
double ** xcvecs
Array of pointers to arrays containing the Chebyshev nodes.
Definition: fpt.c:104
fftw_r2r_kind * kindsr
Transform kinds for fftw library.
Definition: fpt.c:120