NFFT  3.3.0
nfsft_benchomp_detail.c
1 /*
2  * Copyright (c) 2002, 2015 Jens Keiner, Stefan Kunis, Daniel Potts
3  *
4  * This program is free software; you can redistribute it and/or modify it under
5  * the terms of the GNU General Public License as published by the Free Software
6  * Foundation; either version 2 of the License, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12  * details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc., 51
16  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17  */
18 
19 /* $Id: simple_test.c 3372 2009-10-21 06:04:05Z skunis $ */
20 
21 #include <stdio.h>
22 #include <math.h>
23 #include <string.h>
24 #include <stdlib.h>
25 #include <complex.h>
26 
27 #include "nfft3.h"
28 #include "infft.h"
29 #ifdef _OPENMP
30 #include <omp.h>
31 #endif
32 
33 void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
34 {
35  double re,im;
36  int k, n, j, t;
37  nfsft_plan plan;
38 
39  fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
40  *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
41  *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
42  *f = (C*)nfft_malloc((*M)*sizeof(C));
43 
44  memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
45  memset(*f,0U,(*M)*sizeof(C));
46 
47 #ifdef _OPENMP
48  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
49 #else
50  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
51 #endif
52 
53  nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
54  NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
55  PRE_PHI_HUT | FFTW_INIT | FFT_OUT_OF_PLACE, 6);
56 
57 #ifdef _OPENMP
58  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
59 #else
60  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
61 #endif
62 
63  for (j=0; j < *M; j++)
64  for (t=0; t < 2; t++)
65  fscanf(infile, "%lg", (*x)+2*j+t);
66 
67  if (trafo_adjoint==0)
68  {
69  for (k = 0; k <= *N; k++)
70  for (n = -k; n <= k; n++)
71  {
72  fscanf(infile, "%lg %lg", &re, &im);
73  (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
74  }
75  }
76  else
77  {
78  for (j=0; j < *M; j++)
79  {
80  fscanf(infile, "%lg %lg", &re, &im);
81  (*f)[j] = re + _Complex_I * im;
82  }
83  }
84 
85  nfsft_finalize(&plan);
86 }
87 
88 void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
89 {
90  nfsft_plan plan;
91  int k, n;
92 // int N, M, trafo_adjoint;
93  int t, j;
94  ticks t0, t1;
95  double tt_total, tt_pre;
96 
97 // fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);
98 
99 /*#ifdef _OPENMP
100  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
101 #else
102  fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
103 #endif*/
104 
105  /* precomputation (for fast polynomial transform) */
106 // nfsft_precompute(N,1000.0,0U,0U);
107 
108  /* Initialize transform plan using the guru interface. All input and output
109  * arrays are allocated by nfsft_init_guru(). Computations are performed with
110  * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
111  * Fourier coefficients is preserved during transformations. The NFFT uses a
112  * cut-off parameter m = 6. See the NFFT 3 manual for details.
113  */
114  nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
115  NFSFT_MALLOC_F_HAT | NFSFT_NORMALIZED | NFSFT_PRESERVE_F_HAT,
116  PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
117 
118 /*#ifdef _OPENMP
119  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
120 #else
121  fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
122 #endif*/
123 
124  for (j=0; j < plan.M_total; j++)
125  {
126  for (t=0; t < 2; t++)
127  // fscanf(infile, "%lg", plan.x+2*j+t);
128  plan.x[2*j+t] = x[2*j+t];
129  }
130 
131  if (trafo_adjoint==0)
132  {
133  memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
134  for (k = 0; k <= plan.N; k++)
135  for (n = -k; n <= k; n++)
136  {
137 // fscanf(infile, "%lg %lg", &re, &im);
138 // plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
139  plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
140  }
141  }
142  else
143  {
144  for (j=0; j < plan.M_total; j++)
145  {
146 // fscanf(infile, "%lg %lg", &re, &im);
147 // plan.f[j] = re + _Complex_I * im;
148  plan.f[j] = f[j];
149  }
150 
151  memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
152  }
153 
154  t0 = getticks();
155  /* precomputation (for NFFT, node-dependent) */
156  nfsft_precompute_x(&plan);
157  t1 = getticks();
158  tt_pre = nfft_elapsed_seconds(t1,t0);
159 
160  if (trafo_adjoint==0)
161  nfsft_trafo(&plan);
162  else
163  nfsft_adjoint(&plan);
164  t1 = getticks();
165  tt_total = nfft_elapsed_seconds(t1,t0);
166 
167 #ifndef MEASURE_TIME
168  plan.MEASURE_TIME_t[0] = 0.0;
169  plan.MEASURE_TIME_t[2] = 0.0;
170 #endif
171 
172 #ifndef MEASURE_TIME_FFTW
173  plan.MEASURE_TIME_t[1] = 0.0;
174 #endif
175 
176  printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
177 
179  nfsft_finalize(&plan);
180 }
181 
182 int main(int argc, char **argv)
183 {
184  int m, nfsft_flags, psi_flags;
185  int nrepeat;
186  int trafo_adjoint, N, M, r;
187  double *x;
188  C *f_hat, *f;
189 #ifdef _OPENMP
190  int nthreads;
191 
192  if (argc != 6)
193  return 1;
194 
195  nthreads = atoi(argv[5]);
196  fftw_init_threads();
197  omp_set_num_threads(nthreads);
198 #else
199  if (argc != 5)
200  return 1;
201 #endif
202 
203  m = atoi(argv[1]);
204  nfsft_flags = atoi(argv[2]);
205  psi_flags = atoi(argv[3]);
206  nrepeat = atoi(argv[4]);
207 
208  bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
209 
210  /* precomputation (for fast polynomial transform) */
211  nfsft_precompute(N,1000.0,0U,0U);
212 
213  for (r = 0; r < nrepeat; r++)
214  bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
215 
216  return 0;
217 }
double * x
the nodes for ,
Definition: nfft3.h:576
fftw_complex * f_hat
Fourier coefficients.
Definition: nfft3.h:576
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
int N
the bandwidth
Definition: nfft3.h:576
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition: nfft3.h:576
NFFT_INT M_total
Total number of samples.
Definition: nfft3.h:576
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
Definition: nfft3.h:576
void * nfft_malloc(size_t n)
NFFT_INT N_total
Total number of Fourier coefficients.
Definition: nfft3.h:576
fftw_complex * f
Samples.
Definition: nfft3.h:576