NFFT 3.5.3alpha
nfsft_benchomp_detail.c
1/*
2 * Copyright (c) 2002, 2017 Jens Keiner, Stefan Kunis, Daniel Potts
3 *
4 * This program is free software; you can redistribute it and/or modify it under
5 * the terms of the GNU General Public License as published by the Free Software
6 * Foundation; either version 2 of the License, or (at your option) any later
7 * version.
8 *
9 * This program is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12 * details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 51
16 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 */
18
19#include <stdio.h>
20#include <math.h>
21#include <string.h>
22#include <stdlib.h>
23#include <complex.h>
24
25#include "nfft3.h"
26#include "infft.h"
27#ifdef _OPENMP
28#include <omp.h>
29#endif
30
31void bench_openmp_readfile(FILE *infile, int *trafo_adjoint, int *N, int *M, double **x, C **f_hat, C **f)
32{
33 double re,im;
34 int k, n, j, t;
35 nfsft_plan plan;
36
37 fscanf(infile, "%d %d %d", trafo_adjoint, N, M);
38 *x = (double *)nfft_malloc(2*(*M)*sizeof(double));
39 *f_hat = (C*)nfft_malloc((2*(*N)+2) * (2*(*N)+2) * sizeof(C));
40 *f = (C*)nfft_malloc((*M)*sizeof(C));
41
42 memset(*f_hat,0U,(2*(*N)+2) * (2*(*N)+2) * sizeof(C));
43 memset(*f,0U,(*M)*sizeof(C));
44
45#ifdef _OPENMP
46 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
47#else
48 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
49#endif
50
51 nfsft_init_guru(&plan, *N, *M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
54
55#ifdef _OPENMP
56 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
57#else
58 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
59#endif
60
61 for (j=0; j < *M; j++)
62 for (t=0; t < 2; t++)
63 fscanf(infile, "%lg", (*x)+2*j+t);
64
65 if (trafo_adjoint==0)
66 {
67 for (k = 0; k <= *N; k++)
68 for (n = -k; n <= k; n++)
69 {
70 fscanf(infile, "%lg %lg", &re, &im);
71 (*f_hat)[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
72 }
73 }
74 else
75 {
76 for (j=0; j < *M; j++)
77 {
78 fscanf(infile, "%lg %lg", &re, &im);
79 (*f)[j] = re + _Complex_I * im;
80 }
81 }
82
83 nfsft_finalize(&plan);
84}
85
86void bench_openmp(int trafo_adjoint, int N, int M, double *x, C *f_hat, C *f, int m, int nfsft_flags, int psi_flags)
87{
88 nfsft_plan plan;
89 int k, n;
90// int N, M, trafo_adjoint;
91 int t, j;
92 ticks t0, t1;
93 double tt_total, tt_pre;
94
95// fscanf(infile, "%d %d %d", &trafo_adjoint, &N, &M);
96
97/*#ifdef _OPENMP
98 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_threads.plan");
99#else
100 fftw_import_wisdom_from_filename("nfsft_benchomp_detail_single.plan");
101#endif*/
102
103 /* precomputation (for fast polynomial transform) */
104// nfsft_precompute(N,1000.0,0U,0U);
105
106 /* Initialize transform plan using the guru interface. All input and output
107 * arrays are allocated by nfsft_init_guru(). Computations are performed with
108 * respect to L^2-normalized spherical harmonics Y_k^n. The array of spherical
109 * Fourier coefficients is preserved during transformations. The NFFT uses a
110 * cut-off parameter m = 6. See the NFFT 3 manual for details.
111 */
112 nfsft_init_guru(&plan, N, M, nfsft_flags | NFSFT_MALLOC_X | NFSFT_MALLOC_F |
114 PRE_PHI_HUT | psi_flags | FFTW_INIT | FFT_OUT_OF_PLACE, m);
115
116/*#ifdef _OPENMP
117 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_threads.plan");
118#else
119 fftw_export_wisdom_to_filename("nfsft_benchomp_detail_single.plan");
120#endif*/
121
122 for (j=0; j < plan.M_total; j++)
123 {
124 for (t=0; t < 2; t++)
125 // fscanf(infile, "%lg", plan.x+2*j+t);
126 plan.x[2*j+t] = x[2*j+t];
127 }
128
129 if (trafo_adjoint==0)
130 {
131 memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
132 for (k = 0; k <= plan.N; k++)
133 for (n = -k; n <= k; n++)
134 {
135// fscanf(infile, "%lg %lg", &re, &im);
136// plan.f_hat[NFSFT_INDEX(k,n,&plan)] = re + _Complex_I * im;
137 plan.f_hat[NFSFT_INDEX(k,n,&plan)] = f_hat[NFSFT_INDEX(k,n,&plan)];
138 }
139 }
140 else
141 {
142 for (j=0; j < plan.M_total; j++)
143 {
144// fscanf(infile, "%lg %lg", &re, &im);
145// plan.f[j] = re + _Complex_I * im;
146 plan.f[j] = f[j];
147 }
148
149 memset(plan.f_hat,0U,plan.N_total*sizeof(double _Complex));
150 }
151
152 t0 = getticks();
153 /* precomputation (for NFFT, node-dependent) */
154 nfsft_precompute_x(&plan);
155 t1 = getticks();
156 tt_pre = nfft_elapsed_seconds(t1,t0);
157
158 if (trafo_adjoint==0)
159 nfsft_trafo(&plan);
160 else
161 nfsft_adjoint(&plan);
162 t1 = getticks();
163 tt_total = nfft_elapsed_seconds(t1,t0);
164
165#ifndef MEASURE_TIME
166 plan.MEASURE_TIME_t[0] = 0.0;
167 plan.MEASURE_TIME_t[2] = 0.0;
168#endif
169
170#ifndef MEASURE_TIME_FFTW
171 plan.MEASURE_TIME_t[1] = 0.0;
172#endif
173
174 printf("%.6e %.6e %6e %.6e %.6e %.6e\n", tt_pre, plan.MEASURE_TIME_t[0], plan.MEASURE_TIME_t[1], plan.MEASURE_TIME_t[2], tt_total-tt_pre-plan.MEASURE_TIME_t[0]-plan.MEASURE_TIME_t[1]-plan.MEASURE_TIME_t[2], tt_total);
175
177 nfsft_finalize(&plan);
178}
179
180int main(int argc, char **argv)
181{
182 int m, nfsft_flags, psi_flags;
183 int nrepeat;
184 int trafo_adjoint, N, M, r;
185 double *x;
186 C *f_hat, *f;
187#ifdef _OPENMP
188 int nthreads;
189
190 if (argc != 6)
191 return 1;
192
193 nthreads = atoi(argv[5]);
194 fftw_init_threads();
195 omp_set_num_threads(nthreads);
196#else
197 if (argc != 5)
198 return 1;
199#endif
200
201 m = atoi(argv[1]);
202 nfsft_flags = atoi(argv[2]);
203 psi_flags = atoi(argv[3]);
204 nrepeat = atoi(argv[4]);
205
206 bench_openmp_readfile(stdin, &trafo_adjoint, &N, &M, &x, &f_hat, &f);
207
208 /* precomputation (for fast polynomial transform) */
209 nfsft_precompute(N,1000.0,0U,0U);
210
211 for (r = 0; r < nrepeat; r++)
212 bench_openmp(trafo_adjoint, N, M, x, f_hat, f, m, nfsft_flags, psi_flags);
213
214 return 0;
215}
#define FFT_OUT_OF_PLACE
Definition nfft3.h:196
#define FFTW_INIT
Definition nfft3.h:197
#define PRE_PHI_HUT
Definition nfft3.h:187
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
#define NFSFT_MALLOC_X
Definition nfft3.h:588
void nfsft_trafo(nfsft_plan *plan)
Definition nfsft.c:1068
#define NFSFT_NORMALIZED
Definition nfft3.h:585
void nfsft_adjoint(nfsft_plan *plan)
Definition nfsft.c:1316
#define NFSFT_INDEX(k, n, plan)
Definition nfft3.h:605
void nfsft_finalize(nfsft_plan *plan)
Definition nfsft.c:625
#define NFSFT_MALLOC_F_HAT
Definition nfft3.h:589
void nfsft_precompute(int N, double kappa, unsigned int nfsft_flags, unsigned int fpt_flags)
Definition nfsft.c:376
#define NFSFT_PRESERVE_F_HAT
Definition nfft3.h:591
#define NFSFT_MALLOC_F
Definition nfft3.h:590
Internal header file for auxiliary definitions and functions.
Header file for the nfft3 library.
void * nfft_malloc(size_t n)
data structure for an NFSFT (nonequispaced fast spherical Fourier transform) plan with double precisi...
Definition nfft3.h:581
NFFT_INT N_total
Total number of Fourier coefficients.
Definition nfft3.h:581
NFFT_INT M_total
Total number of samples.
Definition nfft3.h:581
double MEASURE_TIME_t[3]
Measured time for each step if MEASURE_TIME is set.
Definition nfft3.h:581
int N
the bandwidth
Definition nfft3.h:581
fftw_complex * f
Samples.
Definition nfft3.h:581
double * x
the nodes for ,
Definition nfft3.h:581
fftw_complex * f_hat
Fourier coefficients.
Definition nfft3.h:581