Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
NE10_fft_float32.c
1 /*
2  * Copyright 2013-15 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_float32.c
45  */
46 
47 #include "NE10_types.h"
48 #include "NE10_macros.h"
49 #include "NE10_fft.h"
50 
51 static void ne10_mixed_radix_butterfly_float32_c (ne10_fft_cpx_float32_t * Fout,
53  ne10_int32_t * factors,
54  ne10_fft_cpx_float32_t * twiddles,
55  ne10_fft_cpx_float32_t * buffer)
56 {
57  ne10_int32_t fstride, mstride, N;
58  ne10_int32_t fstride1;
59  ne10_int32_t f_count, m_count;
60  ne10_int32_t stage_count;
61 
62  ne10_fft_cpx_float32_t scratch_in[8];
63  ne10_fft_cpx_float32_t scratch_out[8];
64  ne10_fft_cpx_float32_t scratch[16];
65  ne10_fft_cpx_float32_t scratch_tw[6];
66 
67  ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2;
68  ne10_fft_cpx_float32_t *Fout_ls = Fout;
70  ne10_fft_cpx_float32_t *tw, *tw1, *tw2;
71  const ne10_float32_t TW_81 = 0.70710678;
72  const ne10_float32_t TW_81N = -0.70710678;
73 
74  // init fstride, mstride, N, tw
75  stage_count = factors[0];
76  fstride = factors[1];
77  mstride = factors[ (stage_count << 1) - 1 ];
78  N = factors[ stage_count << 1 ]; // radix
79  tw = twiddles;
80 
81  // the first stage
82  Fin1 = Fin;
83  Fout1 = Fout;
84  if (N == 2) // length of FFT is 2^n (n is odd)
85  {
86  // radix 8
87  N = fstride >> 1; // 1/4 of length of FFT
88  fstride1 = fstride >> 2;
89 
90  Fin1 = Fin;
91  for (f_count = 0; f_count < fstride1; f_count ++)
92  {
93  Fout1 = & Fout[ f_count * 8 ];
94 
95  scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
96  scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
97  scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
98  scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
99  scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
100  scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
101  scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
102  scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
103  scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
104  scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
105  scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
106  scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
107  scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
108  scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
109  scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
110  scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
111 
112  // radix 4 butterfly without twiddles
113  scratch[0] = scratch_in[0];
114  scratch[1] = scratch_in[1];
115 
116  scratch[2] = scratch_in[2];
117  scratch[3].r = (scratch_in[3].r + scratch_in[3].i) * TW_81;
118  scratch[3].i = (scratch_in[3].i - scratch_in[3].r) * TW_81;
119 
120  scratch[4] = scratch_in[4];
121  scratch[5].r = scratch_in[5].i;
122  scratch[5].i = -scratch_in[5].r;
123 
124  scratch[6].r = scratch_in[6].r;
125  scratch[6].i = scratch_in[6].i;
126  scratch[7].r = (scratch_in[7].r - scratch_in[7].i) * TW_81N;
127  scratch[7].i = (scratch_in[7].i + scratch_in[7].r) * TW_81N;
128 
129  // radix 2 butterfly
130  scratch[8].r = scratch[0].r + scratch[4].r;
131  scratch[8].i = scratch[0].i + scratch[4].i;
132  scratch[9].r = scratch[1].r + scratch[5].r;
133  scratch[9].i = scratch[1].i + scratch[5].i;
134 
135  scratch[10].r = scratch[0].r - scratch[4].r;
136  scratch[10].i = scratch[0].i - scratch[4].i;
137  scratch[11].r = scratch[1].r - scratch[5].r;
138  scratch[11].i = scratch[1].i - scratch[5].i;
139 
140  // radix 2 butterfly
141  scratch[12].r = scratch[2].r + scratch[6].r;
142  scratch[12].i = scratch[2].i + scratch[6].i;
143  scratch[13].r = scratch[3].r + scratch[7].r;
144  scratch[13].i = scratch[3].i + scratch[7].i;
145 
146  scratch[14].r = scratch[2].r - scratch[6].r;
147  scratch[14].i = scratch[2].i - scratch[6].i;
148  scratch[15].r = scratch[3].r - scratch[7].r;
149  scratch[15].i = scratch[3].i - scratch[7].i;
150 
151  // third result
152  scratch_out[4].r = scratch[8].r - scratch[12].r;
153  scratch_out[4].i = scratch[8].i - scratch[12].i;
154  scratch_out[5].r = scratch[9].r - scratch[13].r;
155  scratch_out[5].i = scratch[9].i - scratch[13].i;
156 
157  // first result
158  scratch_out[0].r = scratch[8].r + scratch[12].r;
159  scratch_out[0].i = scratch[8].i + scratch[12].i;
160  scratch_out[1].r = scratch[9].r + scratch[13].r;
161  scratch_out[1].i = scratch[9].i + scratch[13].i;
162 
163  // second result
164  scratch_out[2].r = scratch[10].r + scratch[14].i;
165  scratch_out[2].i = scratch[10].i - scratch[14].r;
166  scratch_out[3].r = scratch[11].r + scratch[15].i;
167  scratch_out[3].i = scratch[11].i - scratch[15].r;
168 
169  // forth result
170  scratch_out[6].r = scratch[10].r - scratch[14].i;
171  scratch_out[6].i = scratch[10].i + scratch[14].r;
172  scratch_out[7].r = scratch[11].r - scratch[15].i;
173  scratch_out[7].i = scratch[11].i + scratch[15].r;
174 
175  // store
176  Fout1[0] = scratch_out[0];
177  Fout1[1] = scratch_out[1];
178  Fout1[2] = scratch_out[2];
179  Fout1[3] = scratch_out[3];
180  Fout1[4] = scratch_out[4];
181  Fout1[5] = scratch_out[5];
182  Fout1[6] = scratch_out[6];
183  Fout1[7] = scratch_out[7];
184 
185  Fin1 += 1;
186  } // f_count
187  tw += 6;
188  mstride <<= 2;
189  fstride >>= 4;
190  stage_count -= 2;
191 
192  // swap
193  Ftmp = buffer;
194  buffer = Fout;
195  Fout = Ftmp;
196  }
197  else if (N == 4) // length of FFT is 2^n (n is even)
198  {
199  //fstride is nfft>>2
200  for (f_count = fstride; f_count ; f_count --)
201  {
202  // load
203  scratch_in[0] = *Fin1;
204  Fin2 = Fin1 + fstride;
205  scratch_in[1] = *Fin2;
206  Fin2 = Fin2 + fstride;
207  scratch_in[2] = *Fin2;
208  Fin2 = Fin2 + fstride;
209  scratch_in[3] = *Fin2;
210 
211  // radix 4 butterfly without twiddles
212 
213  // radix 2 butterfly
214  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
215  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
216 
217  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
218  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
219 
220  // radix 2 butterfly
221  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
222  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
223 
224  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
225  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
226 
227  // third result
228  scratch_out[2].r = scratch[0].r - scratch[2].r;
229  scratch_out[2].i = scratch[0].i - scratch[2].i;
230 
231  // first result
232  scratch_out[0].r = scratch[0].r + scratch[2].r;
233  scratch_out[0].i = scratch[0].i + scratch[2].i;
234 
235  // second result
236  scratch_out[1].r = scratch[1].r + scratch[3].i;
237  scratch_out[1].i = scratch[1].i - scratch[3].r;
238 
239  // forth result
240  scratch_out[3].r = scratch[1].r - scratch[3].i;
241  scratch_out[3].i = scratch[1].i + scratch[3].r;
242 
243  // store
244  * Fout1 ++ = scratch_out[0];
245  * Fout1 ++ = scratch_out[1];
246  * Fout1 ++ = scratch_out[2];
247  * Fout1 ++ = scratch_out[3];
248 
249  Fin1++;
250  } // f_count
251 
252  N = fstride; // 1/4 of length of FFT
253 
254  // update address for other stages
255  stage_count--;
256  fstride >>= 2;
257 
258  // swap
259  Ftmp = buffer;
260  buffer = Fout;
261  Fout = Ftmp;
262  // end of first stage
263  }
264 
265 
266  // others but the last one
267  for (; stage_count > 1 ; stage_count--)
268  {
269  Fin1 = buffer;
270  for (f_count = 0; f_count < fstride; f_count ++)
271  {
272  Fout1 = & Fout[ f_count * mstride << 2 ];
273  tw1 = tw;
274  for (m_count = mstride; m_count ; m_count --)
275  {
276  // load
277  scratch_tw[0] = *tw1;
278  tw2 = tw1 + mstride;
279  scratch_tw[1] = *tw2;
280  tw2 += mstride;
281  scratch_tw[2] = *tw2;
282  scratch_in[0] = * Fin1;
283  Fin2 = Fin1 + N;
284  scratch_in[1] = * Fin2;
285  Fin2 += N;
286  scratch_in[2] = * Fin2;
287  Fin2 += N;
288  scratch_in[3] = * Fin2;
289 
290  // radix 4 butterfly with twiddles
291 
292  scratch[0] = scratch_in[0];
293  scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
294  scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
295 
296  scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
297  scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
298 
299  scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
300  scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
301 
302  // radix 2 butterfly
303  scratch[4].r = scratch[0].r + scratch[2].r;
304  scratch[4].i = scratch[0].i + scratch[2].i;
305 
306  scratch[5].r = scratch[0].r - scratch[2].r;
307  scratch[5].i = scratch[0].i - scratch[2].i;
308 
309  // radix 2 butterfly
310  scratch[6].r = scratch[1].r + scratch[3].r;
311  scratch[6].i = scratch[1].i + scratch[3].i;
312 
313  scratch[7].r = scratch[1].r - scratch[3].r;
314  scratch[7].i = scratch[1].i - scratch[3].i;
315 
316  // third result
317  scratch_out[2].r = scratch[4].r - scratch[6].r;
318  scratch_out[2].i = scratch[4].i - scratch[6].i;
319 
320  // first result
321  scratch_out[0].r = scratch[4].r + scratch[6].r;
322  scratch_out[0].i = scratch[4].i + scratch[6].i;
323 
324  // second result
325  scratch_out[1].r = scratch[5].r + scratch[7].i;
326  scratch_out[1].i = scratch[5].i - scratch[7].r;
327 
328  // forth result
329  scratch_out[3].r = scratch[5].r - scratch[7].i;
330  scratch_out[3].i = scratch[5].i + scratch[7].r;
331 
332  // store
333  *Fout1 = scratch_out[0];
334  Fout2 = Fout1 + mstride;
335  *Fout2 = scratch_out[1];
336  Fout2 += mstride;
337  *Fout2 = scratch_out[2];
338  Fout2 += mstride;
339  *Fout2 = scratch_out[3];
340 
341  tw1++;
342  Fin1 ++;
343  Fout1 ++;
344  } // m_count
345  } // f_count
346  tw += mstride * 3;
347  mstride <<= 2;
348  fstride >>= 2;
349 
350  // swap
351  Ftmp = buffer;
352  buffer = Fout;
353  Fout = Ftmp;
354  } // stage_count
355 
356  // the last one
357  if (stage_count)
358  {
359  Fin1 = buffer;
360  // if stage count is even, output to the input array
361  Fout1 = Fout_ls;
362 
363  for (f_count = 0; f_count < fstride; f_count ++)
364  {
365  tw1 = tw;
366  for (m_count = mstride; m_count ; m_count --)
367  {
368  // load
369  scratch_tw[0] = *tw1;
370  tw2 = tw1 + mstride;
371  scratch_tw[1] = *tw2;
372  tw2 += mstride;
373  scratch_tw[2] = *tw2;
374  scratch_in[0] = * Fin1;
375  Fin2 = Fin1 + N;
376  scratch_in[1] = * Fin2;
377  Fin2 += N;
378  scratch_in[2] = * Fin2;
379  Fin2 += N;
380  scratch_in[3] = * Fin2;
381 
382  // radix 4 butterfly with twiddles
383 
384  scratch[0] = scratch_in[0];
385  scratch[1].r = scratch_in[1].r * scratch_tw[0].r - scratch_in[1].i * scratch_tw[0].i;
386  scratch[1].i = scratch_in[1].i * scratch_tw[0].r + scratch_in[1].r * scratch_tw[0].i;
387 
388  scratch[2].r = scratch_in[2].r * scratch_tw[1].r - scratch_in[2].i * scratch_tw[1].i;
389  scratch[2].i = scratch_in[2].i * scratch_tw[1].r + scratch_in[2].r * scratch_tw[1].i;
390 
391  scratch[3].r = scratch_in[3].r * scratch_tw[2].r - scratch_in[3].i * scratch_tw[2].i;
392  scratch[3].i = scratch_in[3].i * scratch_tw[2].r + scratch_in[3].r * scratch_tw[2].i;
393 
394  // radix 2 butterfly
395  scratch[4].r = scratch[0].r + scratch[2].r;
396  scratch[4].i = scratch[0].i + scratch[2].i;
397 
398  scratch[5].r = scratch[0].r - scratch[2].r;
399  scratch[5].i = scratch[0].i - scratch[2].i;
400 
401  // radix 2 butterfly
402  scratch[6].r = scratch[1].r + scratch[3].r;
403  scratch[6].i = scratch[1].i + scratch[3].i;
404 
405  scratch[7].r = scratch[1].r - scratch[3].r;
406  scratch[7].i = scratch[1].i - scratch[3].i;
407 
408  // third result
409  scratch_out[2].r = scratch[4].r - scratch[6].r;
410  scratch_out[2].i = scratch[4].i - scratch[6].i;
411 
412  // first result
413  scratch_out[0].r = scratch[4].r + scratch[6].r;
414  scratch_out[0].i = scratch[4].i + scratch[6].i;
415 
416  // second result
417  scratch_out[1].r = scratch[5].r + scratch[7].i;
418  scratch_out[1].i = scratch[5].i - scratch[7].r;
419 
420  // forth result
421  scratch_out[3].r = scratch[5].r - scratch[7].i;
422  scratch_out[3].i = scratch[5].i + scratch[7].r;
423 
424  // store
425  *Fout1 = scratch_out[0];
426  Fout2 = Fout1 + N;
427  *Fout2 = scratch_out[1];
428  Fout2 += N;
429  *Fout2 = scratch_out[2];
430  Fout2 += N;
431  *Fout2 = scratch_out[3];
432 
433  tw1 ++;
434  Fin1 ++;
435  Fout1 ++;
436  } // m_count
437  } // f_count
438  } // last stage
439 }
440 
441 static void ne10_mixed_radix_butterfly_inverse_float32_c (ne10_fft_cpx_float32_t * Fout,
443  ne10_int32_t * factors,
444  ne10_fft_cpx_float32_t * twiddles,
445  ne10_fft_cpx_float32_t * buffer)
446 {
447  ne10_int32_t fstride, mstride, N;
448  ne10_int32_t fstride1;
449  ne10_int32_t f_count, m_count;
450  ne10_int32_t stage_count;
451  ne10_float32_t one_by_nfft;
452 
453  ne10_fft_cpx_float32_t scratch_in[8];
454  ne10_fft_cpx_float32_t scratch_out[8];
455  ne10_fft_cpx_float32_t scratch[16];
456  ne10_fft_cpx_float32_t scratch_tw[6];
457 
458  ne10_fft_cpx_float32_t *Fin1, *Fin2, *Fout1, *Fout2;
459  ne10_fft_cpx_float32_t *Fout_ls = Fout;
461  ne10_fft_cpx_float32_t *tw, *tw1, *tw2;
462  const ne10_float32_t TW_81 = 0.70710678;
463  const ne10_float32_t TW_81N = -0.70710678;
464 
465  // init fstride, mstride, N, one_by_nfft, tw
466  stage_count = factors[0];
467  fstride = factors[1];
468  mstride = factors[ (stage_count << 1) - 1 ];
469  N = factors[ stage_count << 1 ]; // radix
470  one_by_nfft = (1.0f / (ne10_float32_t) (fstride * N));
471  tw = twiddles;
472 
473  // the first stage
474  Fin1 = Fin;
475  Fout1 = Fout;
476  if (N == 2) // length of FFT is 2^n (n is odd)
477  {
478  // radix 8
479  N = fstride >> 1; // 1/4 of length of FFT
480  fstride1 = fstride >> 2;
481 
482  Fin1 = Fin;
483  for (f_count = 0; f_count < fstride1; f_count ++)
484  {
485  Fout1 = & Fout[ f_count * 8 ];
486 
487  scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
488  scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
489  scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
490  scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
491  scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
492  scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
493  scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
494  scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
495  scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
496  scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
497  scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
498  scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
499  scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
500  scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
501  scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
502  scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
503 
504  // radix 4 butterfly with twiddles
505 
506  scratch[0] = scratch_in[0];
507  scratch[1] = scratch_in[1];
508 
509  scratch[2] = scratch_in[2];
510  scratch[3].r = (scratch_in[3].r - scratch_in[3].i) * TW_81;
511  scratch[3].i = (scratch_in[3].i + scratch_in[3].r) * TW_81;
512 
513  scratch[4] = scratch_in[4];
514  scratch[5].r = -scratch_in[5].i;
515  scratch[5].i = scratch_in[5].r;
516 
517  scratch[6].r = scratch_in[6].r;
518  scratch[6].i = scratch_in[6].i;
519  scratch[7].r = (scratch_in[7].r + scratch_in[7].i) * TW_81N;
520  scratch[7].i = (scratch_in[7].i - scratch_in[7].r) * TW_81N;
521 
522  // radix 2 butterfly
523  scratch[8].r = scratch[0].r + scratch[4].r;
524  scratch[8].i = scratch[0].i + scratch[4].i;
525  scratch[9].r = scratch[1].r + scratch[5].r;
526  scratch[9].i = scratch[1].i + scratch[5].i;
527 
528  scratch[10].r = scratch[0].r - scratch[4].r;
529  scratch[10].i = scratch[0].i - scratch[4].i;
530  scratch[11].r = scratch[1].r - scratch[5].r;
531  scratch[11].i = scratch[1].i - scratch[5].i;
532 
533  // radix 2 butterfly
534  scratch[12].r = scratch[2].r + scratch[6].r;
535  scratch[12].i = scratch[2].i + scratch[6].i;
536  scratch[13].r = scratch[3].r + scratch[7].r;
537  scratch[13].i = scratch[3].i + scratch[7].i;
538 
539  scratch[14].r = scratch[2].r - scratch[6].r;
540  scratch[14].i = scratch[2].i - scratch[6].i;
541  scratch[15].r = scratch[3].r - scratch[7].r;
542  scratch[15].i = scratch[3].i - scratch[7].i;
543 
544  // third result
545  scratch_out[4].r = scratch[8].r - scratch[12].r;
546  scratch_out[4].i = scratch[8].i - scratch[12].i;
547  scratch_out[5].r = scratch[9].r - scratch[13].r;
548  scratch_out[5].i = scratch[9].i - scratch[13].i;
549 
550  // first result
551  scratch_out[0].r = scratch[8].r + scratch[12].r;
552  scratch_out[0].i = scratch[8].i + scratch[12].i;
553  scratch_out[1].r = scratch[9].r + scratch[13].r;
554  scratch_out[1].i = scratch[9].i + scratch[13].i;
555 
556  // second result
557  scratch_out[2].r = scratch[10].r - scratch[14].i;
558  scratch_out[2].i = scratch[10].i + scratch[14].r;
559  scratch_out[3].r = scratch[11].r - scratch[15].i;
560  scratch_out[3].i = scratch[11].i + scratch[15].r;
561 
562  // forth result
563  scratch_out[6].r = scratch[10].r + scratch[14].i;
564  scratch_out[6].i = scratch[10].i - scratch[14].r;
565  scratch_out[7].r = scratch[11].r + scratch[15].i;
566  scratch_out[7].i = scratch[11].i - scratch[15].r;
567 
568  // store
569  Fout1[0] = scratch_out[0];
570  Fout1[1] = scratch_out[1];
571  Fout1[2] = scratch_out[2];
572  Fout1[3] = scratch_out[3];
573  Fout1[4] = scratch_out[4];
574  Fout1[5] = scratch_out[5];
575  Fout1[6] = scratch_out[6];
576  Fout1[7] = scratch_out[7];
577 
578  Fin1 += 1;
579  } // f_count
580  tw += 6;
581  mstride <<= 2;
582  fstride >>= 4;
583  stage_count -= 2;
584 
585  if (stage_count == 0)
586  {
587  for (f_count = 0; f_count < 8; f_count++)
588  {
589  Fout[f_count].r *= one_by_nfft;
590  Fout[f_count].i *= one_by_nfft;
591  }
592  }
593 
594  // swap
595  Ftmp = buffer;
596  buffer = Fout;
597  Fout = Ftmp;
598  }
599  else if (N == 4) // length of FFT is 2^n (n is even)
600  {
601  //fstride is nfft>>2
602  for (f_count = fstride; f_count ; f_count --)
603  {
604  // load
605  scratch_in[0] = *Fin1;
606  Fin2 = Fin1 + fstride;
607  scratch_in[1] = *Fin2;
608  Fin2 = Fin2 + fstride;
609  scratch_in[2] = *Fin2;
610  Fin2 = Fin2 + fstride;
611  scratch_in[3] = *Fin2;
612 
613  // radix 4 butterfly without twiddles
614 
615  // radix 2 butterfly
616  scratch[0].r = scratch_in[0].r + scratch_in[2].r;
617  scratch[0].i = scratch_in[0].i + scratch_in[2].i;
618 
619  scratch[1].r = scratch_in[0].r - scratch_in[2].r;
620  scratch[1].i = scratch_in[0].i - scratch_in[2].i;
621 
622  // radix 2 butterfly
623  scratch[2].r = scratch_in[1].r + scratch_in[3].r;
624  scratch[2].i = scratch_in[1].i + scratch_in[3].i;
625 
626  scratch[3].r = scratch_in[1].r - scratch_in[3].r;
627  scratch[3].i = scratch_in[1].i - scratch_in[3].i;
628 
629  // third result
630  scratch_out[2].r = scratch[0].r - scratch[2].r;
631  scratch_out[2].i = scratch[0].i - scratch[2].i;
632 
633  // first result
634  scratch_out[0].r = scratch[0].r + scratch[2].r;
635  scratch_out[0].i = scratch[0].i + scratch[2].i;
636 
637  // second result
638  scratch_out[1].r = scratch[1].r - scratch[3].i;
639  scratch_out[1].i = scratch[1].i + scratch[3].r;
640 
641  // forth result
642  scratch_out[3].r = scratch[1].r + scratch[3].i;
643  scratch_out[3].i = scratch[1].i - scratch[3].r;
644 
645  // store
646  * Fout1 ++ = scratch_out[0];
647  * Fout1 ++ = scratch_out[1];
648  * Fout1 ++ = scratch_out[2];
649  * Fout1 ++ = scratch_out[3];
650 
651  Fin1++;
652  } // f_count
653 
654  N = fstride; // 1/4 of length of FFT
655 
656  // update address for other stages
657  stage_count--;
658  fstride >>= 2;
659 
660  if (stage_count == 0)
661  {
662  for (f_count = 0; f_count < 4; f_count++)
663  {
664  Fout[f_count].r *= one_by_nfft;
665  Fout[f_count].i *= one_by_nfft;
666  }
667  }
668  // swap
669  Ftmp = buffer;
670  buffer = Fout;
671  Fout = Ftmp;
672  // end of first stage
673  }
674 
675 
676  // others but the last one
677  for (; stage_count > 1 ; stage_count--)
678  {
679  Fin1 = buffer;
680  for (f_count = 0; f_count < fstride; f_count ++)
681  {
682  Fout1 = & Fout[ f_count * mstride << 2 ];
683  tw1 = tw;
684  for (m_count = mstride; m_count ; m_count --)
685  {
686  // load
687  scratch_tw[0] = *tw1;
688  tw2 = tw1 + mstride;
689  scratch_tw[1] = *tw2;
690  tw2 += mstride;
691  scratch_tw[2] = *tw2;
692  scratch_in[0] = * Fin1;
693  Fin2 = Fin1 + N;
694  scratch_in[1] = * Fin2;
695  Fin2 += N;
696  scratch_in[2] = * Fin2;
697  Fin2 += N;
698  scratch_in[3] = * Fin2;
699 
700  // radix 4 butterfly with twiddles
701 
702  scratch[0] = scratch_in[0];
703  scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
704  scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
705 
706  scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
707  scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
708 
709  scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
710  scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
711 
712  // radix 2 butterfly
713  scratch[4].r = scratch[0].r + scratch[2].r;
714  scratch[4].i = scratch[0].i + scratch[2].i;
715 
716  scratch[5].r = scratch[0].r - scratch[2].r;
717  scratch[5].i = scratch[0].i - scratch[2].i;
718 
719  // radix 2 butterfly
720  scratch[6].r = scratch[1].r + scratch[3].r;
721  scratch[6].i = scratch[1].i + scratch[3].i;
722 
723  scratch[7].r = scratch[1].r - scratch[3].r;
724  scratch[7].i = scratch[1].i - scratch[3].i;
725 
726  // third result
727  scratch_out[2].r = scratch[4].r - scratch[6].r;
728  scratch_out[2].i = scratch[4].i - scratch[6].i;
729 
730  // first result
731  scratch_out[0].r = scratch[4].r + scratch[6].r;
732  scratch_out[0].i = scratch[4].i + scratch[6].i;
733 
734  // second result
735  scratch_out[1].r = scratch[5].r - scratch[7].i;
736  scratch_out[1].i = scratch[5].i + scratch[7].r;
737 
738  // forth result
739  scratch_out[3].r = scratch[5].r + scratch[7].i;
740  scratch_out[3].i = scratch[5].i - scratch[7].r;
741 
742  // store
743  *Fout1 = scratch_out[0];
744  Fout2 = Fout1 + mstride;
745  *Fout2 = scratch_out[1];
746  Fout2 += mstride;
747  *Fout2 = scratch_out[2];
748  Fout2 += mstride;
749  *Fout2 = scratch_out[3];
750 
751  tw1++;
752  Fin1 ++;
753  Fout1 ++;
754  } // m_count
755  } // f_count
756  tw += mstride * 3;
757  mstride <<= 2;
758  fstride >>= 2;
759 
760  // swap
761  Ftmp = buffer;
762  buffer = Fout;
763  Fout = Ftmp;
764  } // stage_count
765 
766  // the last one
767  if (stage_count)
768  {
769  Fin1 = buffer;
770  // if stage count is even, output to the input array
771  Fout1 = Fout_ls;
772 
773  for (f_count = 0; f_count < fstride; f_count ++)
774  {
775  tw1 = tw;
776  for (m_count = mstride; m_count ; m_count --)
777  {
778  // load
779  scratch_tw[0] = *tw1;
780  tw2 = tw1 + mstride;
781  scratch_tw[1] = *tw2;
782  tw2 += mstride;
783  scratch_tw[2] = *tw2;
784  scratch_in[0] = * Fin1;
785  Fin2 = Fin1 + N;
786  scratch_in[1] = * Fin2;
787  Fin2 += N;
788  scratch_in[2] = * Fin2;
789  Fin2 += N;
790  scratch_in[3] = * Fin2;
791 
792  // radix 4 butterfly with twiddles
793 
794  scratch[0] = scratch_in[0];
795  scratch[1].r = scratch_in[1].r * scratch_tw[0].r + scratch_in[1].i * scratch_tw[0].i;
796  scratch[1].i = scratch_in[1].i * scratch_tw[0].r - scratch_in[1].r * scratch_tw[0].i;
797 
798  scratch[2].r = scratch_in[2].r * scratch_tw[1].r + scratch_in[2].i * scratch_tw[1].i;
799  scratch[2].i = scratch_in[2].i * scratch_tw[1].r - scratch_in[2].r * scratch_tw[1].i;
800 
801  scratch[3].r = scratch_in[3].r * scratch_tw[2].r + scratch_in[3].i * scratch_tw[2].i;
802  scratch[3].i = scratch_in[3].i * scratch_tw[2].r - scratch_in[3].r * scratch_tw[2].i;
803 
804  // radix 2 butterfly
805  scratch[4].r = scratch[0].r + scratch[2].r;
806  scratch[4].i = scratch[0].i + scratch[2].i;
807 
808  scratch[5].r = scratch[0].r - scratch[2].r;
809  scratch[5].i = scratch[0].i - scratch[2].i;
810 
811  // radix 2 butterfly
812  scratch[6].r = scratch[1].r + scratch[3].r;
813  scratch[6].i = scratch[1].i + scratch[3].i;
814 
815  scratch[7].r = scratch[1].r - scratch[3].r;
816  scratch[7].i = scratch[1].i - scratch[3].i;
817 
818  // third result
819  scratch_out[2].r = (scratch[4].r - scratch[6].r) * one_by_nfft;
820  scratch_out[2].i = (scratch[4].i - scratch[6].i) * one_by_nfft;
821 
822  // first result
823  scratch_out[0].r = (scratch[4].r + scratch[6].r) * one_by_nfft;
824  scratch_out[0].i = (scratch[4].i + scratch[6].i) * one_by_nfft;
825 
826  // second result
827  scratch_out[1].r = (scratch[5].r - scratch[7].i) * one_by_nfft;
828  scratch_out[1].i = (scratch[5].i + scratch[7].r) * one_by_nfft;
829 
830  // forth result
831  scratch_out[3].r = (scratch[5].r + scratch[7].i) * one_by_nfft;
832  scratch_out[3].i = (scratch[5].i - scratch[7].r) * one_by_nfft;
833 
834  // store
835  *Fout1 = scratch_out[0];
836  Fout2 = Fout1 + N;
837  *Fout2 = scratch_out[1];
838  Fout2 += N;
839  *Fout2 = scratch_out[2];
840  Fout2 += N;
841  *Fout2 = scratch_out[3];
842 
843  tw1 ++;
844  Fin1 ++;
845  Fout1 ++;
846  } // m_count
847  } // f_count
848  } // last stage
849 }
850 
851 static void ne10_fft_split_r2c_1d_float32 (ne10_fft_cpx_float32_t *dst,
852  const ne10_fft_cpx_float32_t *src,
853  ne10_fft_cpx_float32_t *twiddles,
854  ne10_int32_t ncfft)
855 {
856  ne10_int32_t k;
857  ne10_fft_cpx_float32_t fpnk, fpk, f1k, f2k, tw, tdc;
858 
859  tdc.r = src[0].r;
860  tdc.i = src[0].i;
861 
862  dst[0].r = tdc.r + tdc.i;
863  dst[ncfft].r = tdc.r - tdc.i;
864  dst[ncfft].i = dst[0].i = 0;
865 
866  for (k = 1; k <= ncfft / 2 ; ++k)
867  {
868  fpk = src[k];
869  fpnk.r = src[ncfft - k].r;
870  fpnk.i = - src[ncfft - k].i;
871 
872  f1k.r = fpk.r + fpnk.r;
873  f1k.i = fpk.i + fpnk.i;
874 
875  f2k.r = fpk.r - fpnk.r;
876  f2k.i = fpk.i - fpnk.i;
877 
878  tw.r = f2k.r * (twiddles[k - 1]).r - f2k.i * (twiddles[k - 1]).i;
879  tw.i = f2k.r * (twiddles[k - 1]).i + f2k.i * (twiddles[k - 1]).r;
880 
881  dst[k].r = (f1k.r + tw.r) * 0.5f;
882  dst[k].i = (f1k.i + tw.i) * 0.5f;
883  dst[ncfft - k].r = (f1k.r - tw.r) * 0.5f;
884  dst[ncfft - k].i = (tw.i - f1k.i) * 0.5f;
885  }
886 }
887 
888 static void ne10_fft_split_c2r_1d_float32 (ne10_fft_cpx_float32_t *dst,
889  const ne10_fft_cpx_float32_t *src,
890  ne10_fft_cpx_float32_t *twiddles,
891  ne10_int32_t ncfft)
892 {
893 
894  ne10_int32_t k;
895  ne10_fft_cpx_float32_t fk, fnkc, fek, fok, tmp;
896 
897 
898  dst[0].r = (src[0].r + src[ncfft].r) * 0.5f;
899  dst[0].i = (src[0].r - src[ncfft].r) * 0.5f;
900 
901  for (k = 1; k <= ncfft / 2; k++)
902  {
903  fk = src[k];
904  fnkc.r = src[ncfft - k].r;
905  fnkc.i = -src[ncfft - k].i;
906 
907  fek.r = fk.r + fnkc.r;
908  fek.i = fk.i + fnkc.i;
909 
910  tmp.r = fk.r - fnkc.r;
911  tmp.i = fk.i - fnkc.i;
912 
913  fok.r = tmp.r * twiddles[k - 1].r + tmp.i * twiddles[k - 1].i;
914  fok.i = tmp.i * twiddles[k - 1].r - tmp.r * twiddles[k - 1].i;
915 
916  dst[k].r = (fek.r + fok.r) * 0.5f;
917  dst[k].i = (fek.i + fok.i) * 0.5f;
918 
919  dst[ncfft - k].r = (fek.r - fok.r) * 0.5f;
920  dst[ncfft - k].i = (fok.i - fek.i) * 0.5f;
921  }
922 }
923 
998 {
999  ne10_fft_cfg_float32_t st = NULL;
1000  ne10_uint32_t memneeded = sizeof (ne10_fft_state_float32_t)
1001  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors*/
1002  + sizeof (ne10_fft_cpx_float32_t) * nfft /* twiddle*/
1003  + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer*/
1004  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1005 
1006  st = (ne10_fft_cfg_float32_t) NE10_MALLOC (memneeded);
1007 
1008  // Only backward FFT is scaled by default.
1009  st->is_forward_scaled = 0;
1010  st->is_backward_scaled = 1;
1011 
1012  if (st == NULL)
1013  {
1014  return st;
1015  }
1016 
1017  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_float32_t);
1018  NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1019  st->factors = (ne10_int32_t*) address;
1020  st->twiddles = (ne10_fft_cpx_float32_t*) (st->factors + (NE10_MAXFACTORS * 2));
1021  st->buffer = st->twiddles + nfft;
1022  st->nfft = nfft;
1023 
1024  ne10_int32_t result = ne10_factor (nfft, st->factors, NE10_FACTOR_DEFAULT);
1025  if (result == NE10_ERR)
1026  {
1027  NE10_FREE (st);
1028  return st;
1029  }
1030 
1031  // Check if ALGORITHM FLAG is NE10_FFT_ALG_ANY.
1032  {
1033  ne10_int32_t stage_count = st->factors[0];
1034  ne10_int32_t algorithm_flag = st->factors[2 * (stage_count + 1)];
1035 
1036  // Enable radix-8.
1037  if (algorithm_flag == NE10_FFT_ALG_ANY)
1038  {
1039  result = ne10_factor (st->nfft, st->factors, NE10_FACTOR_EIGHT);
1040  if (result == NE10_ERR)
1041  {
1042  PRINT_HIT;
1043  NE10_FREE (st);
1044  return st;
1045  }
1046  }
1047  }
1048 
1049  ne10_fft_generate_twiddles_float32 (st->twiddles, st->factors, nfft);
1050 
1051  return st;
1052 }
1053 
1068  ne10_int32_t inverse_fft)
1069 {
1070  ne10_int32_t stage_count = cfg->factors[0];
1071  ne10_int32_t algorithm_flag = cfg->factors[2 * (stage_count + 1)];
1072 
1073  assert ((algorithm_flag == NE10_FFT_ALG_24)
1074  || (algorithm_flag == NE10_FFT_ALG_ANY));
1075 
1076  switch (algorithm_flag)
1077  {
1078  case NE10_FFT_ALG_24:
1079  if (inverse_fft)
1080  {
1081  ne10_mixed_radix_butterfly_inverse_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
1082  }
1083  else
1084  {
1085  ne10_mixed_radix_butterfly_float32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer);
1086  }
1087  break;
1088  case NE10_FFT_ALG_ANY:
1089  if (inverse_fft)
1090  {
1091  ne10_mixed_radix_generic_butterfly_inverse_float32_c (fout, fin,
1092  cfg->factors, cfg->twiddles, cfg->buffer, cfg->is_backward_scaled);
1093  }
1094  else
1095  {
1096  ne10_mixed_radix_generic_butterfly_float32_c (fout, fin,
1097  cfg->factors, cfg->twiddles, cfg->buffer, cfg->is_forward_scaled);
1098  }
1099  break;
1100  }
1101 }
1102  //end of C2C_FFT_IFFT group
1106 
1184 // For NE10_UNROLL_LEVEL > 0, please refer to NE10_rfft_float.c
1185 #if (NE10_UNROLL_LEVEL == 0)
1186 
1194 {
1195  ne10_fft_r2c_cfg_float32_t st = NULL;
1196  ne10_int32_t ncfft = nfft >> 1;
1197 
1198  ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_float32_t)
1199  + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1200  + sizeof (ne10_fft_cpx_float32_t) * ncfft /* twiddle*/
1201  + sizeof (ne10_fft_cpx_float32_t) * (ncfft / 2) /* super twiddles*/
1202  + sizeof (ne10_fft_cpx_float32_t) * nfft /* buffer*/
1203  + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1204 
1205  st = (ne10_fft_r2c_cfg_float32_t) NE10_MALLOC (memneeded);
1206 
1207  if (st)
1208  {
1209  uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_float32_t);
1210  NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1211  st->factors = (ne10_int32_t*) address;
1212  st->twiddles = (ne10_fft_cpx_float32_t*) (st->factors + (NE10_MAXFACTORS * 2));
1213  st->super_twiddles = st->twiddles + ncfft;
1214  st->buffer = st->super_twiddles + (ncfft / 2);
1215  st->ncfft = ncfft;
1216 
1217  ne10_int32_t result = ne10_factor (ncfft, st->factors, NE10_FACTOR_DEFAULT);
1218  if (result == NE10_ERR)
1219  {
1220  NE10_FREE (st);
1221  return st;
1222  }
1223 
1224  ne10_int32_t i, j;
1225  ne10_int32_t *factors = st->factors;
1226  ne10_fft_cpx_float32_t *twiddles = st->twiddles;
1228  ne10_int32_t stage_count = factors[0];
1229  ne10_int32_t fstride1 = factors[1];
1230  ne10_int32_t fstride2 = fstride1 * 2;
1231  ne10_int32_t fstride3 = fstride1 * 3;
1232  ne10_int32_t m;
1233 
1234  const ne10_float32_t pi = NE10_PI;
1235  ne10_float32_t phase1;
1236  ne10_float32_t phase2;
1237  ne10_float32_t phase3;
1238 
1239  for (i = stage_count - 1; i > 0; i--)
1240  {
1241  fstride1 >>= 2;
1242  fstride2 >>= 2;
1243  fstride3 >>= 2;
1244  m = factors[2 * i + 1];
1245  tw = twiddles;
1246  for (j = 0; j < m; j++)
1247  {
1248  phase1 = -2 * pi * fstride1 * j / ncfft;
1249  phase2 = -2 * pi * fstride2 * j / ncfft;
1250  phase3 = -2 * pi * fstride3 * j / ncfft;
1251  tw->r = (ne10_float32_t) cos (phase1);
1252  tw->i = (ne10_float32_t) sin (phase1);
1253  (tw + m)->r = (ne10_float32_t) cos (phase2);
1254  (tw + m)->i = (ne10_float32_t) sin (phase2);
1255  (tw + m * 2)->r = (ne10_float32_t) cos (phase3);
1256  (tw + m * 2)->i = (ne10_float32_t) sin (phase3);
1257  tw++;
1258  }
1259  twiddles += m * 3;
1260  }
1261 
1262  tw = st->super_twiddles;
1263  for (i = 0; i < ncfft / 2; i++)
1264  {
1265  phase1 = -pi * ( (ne10_float32_t) (i + 1) / ncfft + 0.5f);
1266  tw->r = (ne10_float32_t) cos (phase1);
1267  tw->i = (ne10_float32_t) sin (phase1);
1268  tw++;
1269  }
1270 
1271  }
1272  return st;
1273 }
1274 
1286  ne10_float32_t *fin,
1288 {
1289  ne10_fft_cpx_float32_t * tmpbuf = cfg->buffer;
1290 
1291  ne10_mixed_radix_butterfly_float32_c (tmpbuf, (ne10_fft_cpx_float32_t*) fin, cfg->factors, cfg->twiddles, fout);
1292  ne10_fft_split_r2c_1d_float32 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft);
1293 }
1294 
1305 void ne10_fft_c2r_1d_float32_c (ne10_float32_t *fout,
1308 {
1309  ne10_fft_cpx_float32_t * tmpbuf1 = cfg->buffer;
1310  ne10_fft_cpx_float32_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1311 
1312  ne10_fft_split_c2r_1d_float32 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft);
1313  ne10_mixed_radix_butterfly_inverse_float32_c ( (ne10_fft_cpx_float32_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2);
1314 }
1315 
1319 #endif // NE10_UNROLL_LEVEL
ne10_fft_state_float32_t
structure for the floating point FFT state
Definition: NE10_types.h:240
ne10_fft_c2r_1d_float32_c
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 IFFT (complex to real) of float(32-bit) data.
Definition: NE10_fft_float32.c:1305
ne10_fft_alloc_r2c_float32
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
Definition: NE10_fft_float32.c:1193
ne10_fft_cpx_float32_t
Definition: NE10_types.h:230
ne10_fft_alloc_c2c_float32_c
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
Definition: NE10_fft_float32.c:997
ne10_fft_c2c_1d_float32_c
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Mixed radix-2/3/4/5 complex FFT/IFFT of float(32-bit) data.
Definition: NE10_fft_float32.c:1065
ne10_fft_r2c_state_float32_t
Definition: NE10_types.h:272
ne10_fft_state_float32_t::is_forward_scaled
ne10_int32_t is_forward_scaled
@biref Flag to control scaling behaviour in forward floating point complex FFT.
Definition: NE10_types.h:255
ne10_fft_r2c_1d_float32_c
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Mixed radix-2/4 FFT (real to complex) of float(32-bit) data.
Definition: NE10_fft_float32.c:1285
ne10_fft_state_float32_t::is_backward_scaled
ne10_int32_t is_backward_scaled
@biref Flag to control scaling behaviour in backward floating point complex FFT.
Definition: NE10_types.h:264