Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_iir.c
1/*
2 * Copyright 2012-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test_suite_iir.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35
36#include "NE10_dsp.h"
37#include "seatest.h"
38
39
40/* ----------------------------------------------------------------------
41** Global defines
42** ------------------------------------------------------------------- */
43
44/* Max FFT Length 1024 and double buffer for real and imag */
45#define TEST_LENGTH_SAMPLES 320
46#define MAX_BLOCKSIZE 320
47#define MAX_NUMTAPS 100
48
49#define TEST_COUNT 5000
50
51/* ----------------------------------------------------------------------
52** Coefficients of 1-tap filter for F32
53** ------------------------------------------------------------------- */
54
55static ne10_float32_t testkCoeffs1[1] =
56{
57 -0.3249
58};
59static ne10_float32_t testvCoeffs1[2] =
60{
61 0.447214, 0.337540
62};
63
64/* ----------------------------------------------------------------------
65** Coefficients of 9-tap filter for F32, Q31, Q15
66** ------------------------------------------------------------------- */
67
68static ne10_float32_t testkCoeffs9[9] =
69{
70 -0.003320, 0.035949, -0.164096, 0.406018, -0.633594, 0.764885, -0.817318, 0.893064,
71 -0.748373
72};
73static ne10_float32_t testvCoeffs9[10] =
74{
75 -0.013805, -0.001180, 0.075167, 0.156646, 0.156373, 0.093161, 0.036815, 0.009947,
76 0.001679, 0.000133
77};
78
79/* ----------------------------------------------------------------------
80** Coefficients of 8-tap filter for F32, Q31, Q15
81** ------------------------------------------------------------------- */
82
83static ne10_float32_t testkCoeffs8[8] =
84{
85 0.006226, -0.059956, 0.238433, -0.507424, 0.708901, -0.798284, 0.881225, -0.754774
86
87};
88static ne10_float32_t testvCoeffs8[9] =
89{
90 -0.018552, 0.019153, 0.124951, 0.186823, 0.143778, 0.067568, 0.020944, 0.004009,
91 0.000358
92};
93
94/* ----------------------------------------------------------------------
95** Coefficients of 10-tap filter for F32
96** ------------------------------------------------------------------- */
97
98static ne10_float32_t testkCoeffs10[10] = { 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316,
99 0.902786, -0.741338
100};
101
102static ne10_float32_t testvCoeffs10[11] =
103{
104 -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182,
105 0.004598, 0.000694, 0.000050
106};
107
108/* ----------------------------------------------------------------------
109** Coefficients of 10-tap filter for F32
110** ------------------------------------------------------------------- */
111
112static ne10_float32_t testkCoeffs33[33] =
113{
114 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
115 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
116 0.001770, -0.021279, 0.109785, -0.312208, 0.551053, -0.711844, 0.797513, -0.828316, 0.902786, -0.741338,
117 0.001770, -0.021279, 0.109785
118};
119
120static ne10_float32_t testvCoeffs33[34] =
121{
122 -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000050,
123 -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694,
124 -0.008154, -0.009240, 0.037339, 0.117832, 0.151836, 0.113971, 0.055862, 0.019182, 0.004598, 0.000694,
125 -0.008154, -0.009240, 0.037339, 0.117832
126};
127
128/* ----------------------------------------------------------------------
129** Coefficients of 2-tap filter for F32
130** ------------------------------------------------------------------- */
131
132static ne10_float32_t testkCoeffs2[2] = { 0.2722, -0.5878 };
133
134static ne10_float32_t testvCoeffs2[3] =
135{
136 0.3072, 0.3603, 0.1311
137};
138
139/* ----------------------------------------------------------------------
140** Test input data for F32
141** Generated by the MATLAB rand() function
142** ------------------------------------------------------------------- */
143
144static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
145{
146 -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
147 0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
148 1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
149 -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
150 0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
151 -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
152 -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
153 0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
154 0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
155 -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975,
156 0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045,
157 1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904,
158 0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635,
159 0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903,
160 1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465,
161 0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218,
162 0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917,
163 1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490,
164 -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294,
165 0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363,
166 0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325,
167 0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353,
168 0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160,
169 -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894,
170 1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209,
171 0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048,
172 -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224,
173 0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439,
174 1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155,
175 -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070,
176 0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649,
177 0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134,
178 -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085,
179 -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095,
180 -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118,
181 1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084,
182 -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101,
183 0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395,
184 1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216,
185 -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934
186
187};
188
189
190/* ----------------------------------------------------------------------
191** Defines each of the tests performed
192** ------------------------------------------------------------------- */
193typedef struct
194{
195 ne10_uint32_t blockSize;
196 ne10_uint32_t numTaps;
197 ne10_uint32_t numFrames;
198 ne10_float32_t *kCoeffsF32;
199 ne10_float32_t *vCoeffsF32;
200 ne10_float32_t *inputF32;
202
203/* All Test configurations, 100% Code Coverage */
204#if defined (SMOKE_TEST)||(REGRESSION_TEST)
205static test_config CONFIG[] = {{32, 1, 10, &testkCoeffs1[0], &testvCoeffs1[0], &testInput_f32[0]},
206 {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
207 {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
208 {32, 10, 10, &testkCoeffs10[0], &testvCoeffs10[0], &testInput_f32[0]},
209 {5, 2, 64, &testkCoeffs2[0], &testvCoeffs2[0], &testInput_f32[0]},
210 {0, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
211 {0, 0, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
212 {32, 8, 10, &testkCoeffs8[0], &testvCoeffs8[0], &testInput_f32[0]},
213 {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
214};
215#define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
216#endif
217#ifdef PERFORMANCE_TEST
218static test_config CONFIG_PERF[] =
219{
220 {2, 9, 160, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
221 {32, 9, 10, &testkCoeffs9[0], &testvCoeffs9[0], &testInput_f32[0]},
222 {32, 33, 10, &testkCoeffs33[0], &testvCoeffs33[0], &testInput_f32[0]}
223};
224#define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
225#endif
226
227//input and output
228static ne10_float32_t * guarded_in_c = NULL;
229static ne10_float32_t * guarded_in_neon = NULL;
230static ne10_float32_t * in_c = NULL;
231static ne10_float32_t * in_neon = NULL;
232
233static ne10_float32_t * guarded_out_c = NULL;
234static ne10_float32_t * guarded_out_neon = NULL;
235static ne10_float32_t * out_c = NULL;
236static ne10_float32_t * out_neon = NULL;
237
238static ne10_float32_t * guarded_iir_state_c = NULL;
239static ne10_float32_t * guarded_iir_state_neon = NULL;
240static ne10_float32_t * iir_state_c = NULL;
241static ne10_float32_t * iir_state_neon = NULL;
242
243#if defined (SMOKE_TEST)||(REGRESSION_TEST)
244static ne10_float32_t snr = 0.0f;
245#endif
246#ifdef PERFORMANCE_TEST
247static ne10_int64_t time_c = 0;
248static ne10_int64_t time_neon = 0;
249static ne10_float32_t time_speedup = 0.0f;
250static ne10_float32_t time_savings = 0.0f;
251#endif
252
253void test_iir_lattice_case0()
254{
256
257 ne10_uint16_t loop = 0;
258 ne10_uint16_t block = 0;
259 ne10_uint16_t i = 0;
260
261 test_config *config;
262
263 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
264
265 /* init input memory */
266 NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
267 NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
268
269 /* init dst memory */
270 NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
271 NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
272
273 /* init state memory */
274 NE10_DST_ALLOC (iir_state_c, guarded_iir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
275 NE10_DST_ALLOC (iir_state_neon, guarded_iir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
276
277#ifdef ENABLE_NE10_IIR_LATTICE_FLOAT_NEON
278#if defined (SMOKE_TEST)||(REGRESSION_TEST)
279 ne10_uint16_t pos = 0;
280 for (loop = 0; loop < NUM_TESTS; loop++)
281 {
282 config = &CONFIG[loop];
283
284 /* Initialize the CFFT/CIFFT module */
285 ne10_iir_lattice_init_float (&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
286 ne10_iir_lattice_init_float (&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
287
288 /* copy input to input buffer */
289 for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
290 {
291 in_c[i] = testInput_f32[i];
292 in_neon[i] = testInput_f32[i];
293 out_c[i] = 0;
294 out_neon[i] = 0;
295 }
296
297 GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
298 GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
299
300 for (block = 0; block < config->numFrames; block++)
301 {
302 ne10_iir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
303 }
304 for (block = 0; block < config->numFrames; block++)
305 {
306 ne10_iir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
307 }
308
309 CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES);
310 CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES);
311
312 //conformance test 1: compare snr
313 snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
314#if defined (DEBUG_TRACE)
315 printf ("--------------------config %d\n", loop);
316 printf ("snr %f\n", snr);
317#endif
318 assert_false ( (snr < SNR_THRESHOLD));
319
320 //conformance test 2: compare output of C and neon
321 for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
322 {
323#if defined (DEBUG_TRACE)
324 printf ("pos %d \n", pos);
325 printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
326#endif
327 assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_LARGE, 1);
328 }
329
330 }
331#endif
332#endif // ENABLE_NE10_IIR_LATTICE_FLOAT_NEON
333
334#ifdef PERFORMANCE_TEST
335 ne10_uint16_t k;
336 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "IIR Length&Taps", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
337 for (loop = 0; loop < NUM_PERF_TESTS; loop++)
338 {
339 config = &CONFIG_PERF[loop];
340
341 /* Initialize the CFFT/CIFFT module */
342 ne10_iir_lattice_init_float (&SC, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_c, config->blockSize);
343 ne10_iir_lattice_init_float (&SN, config->numTaps, config->kCoeffsF32, config->vCoeffsF32, iir_state_neon, config->blockSize);
344
345 /* copy input to input buffer */
346 for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
347 {
348 in_c[i] = testInput_f32[i];
349 in_neon[i] = testInput_f32[i];
350 }
351
352 GET_TIME
353 (
354 time_c,
355 {
356 for (k = 0; k < TEST_COUNT; k++)
357 {
358 for (block = 0; block < config->numFrames; block++)
359 {
360 ne10_iir_lattice_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), config->blockSize);
361 }
362 }
363 }
364 );
365
366#ifdef ENABLE_NE10_IIR_LATTICE_FLOAT_NEON
367 GET_TIME
368 (
369 time_neon,
370 {
371 for (k = 0; k < TEST_COUNT; k++)
372 {
373 for (block = 0; block < config->numFrames; block++)
374 {
375 ne10_iir_lattice_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), config->blockSize);
376 }
377 }
378 }
379 );
380#endif // ENABLE_NE10_IIR_LATTICE_FLOAT_NEON
381
382 time_speedup = (ne10_float32_t) time_c / time_neon;
383 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
384 ne10_log (__FUNCTION__, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->numTaps, time_c, time_neon, time_savings, time_speedup);
385
386 }
387#endif
388
389 free (guarded_in_c);
390 free (guarded_in_neon);
391 free (guarded_out_c);
392 free (guarded_out_neon);
393 free (guarded_iir_state_c);
394 free (guarded_iir_state_neon);
395 fprintf (stdout, "----------%30s end\n", __FUNCTION__);
396}
397
398void test_iir_lattice()
399{
400 test_iir_lattice_case0();
401}
402
403static void my_test_setup (void)
404{
405 ne10_log_buffer_ptr = ne10_log_buffer;
406}
407
408void test_fixture_iir_lattice (void)
409{
410 test_fixture_start(); // starts a fixture
411
412 fixture_setup (my_test_setup);
413
414 run_test (test_iir_lattice); // run tests
415
416 test_fixture_end(); // ends a fixture
417}
ne10_result_t ne10_iir_lattice_init_float(ne10_iir_lattice_instance_f32_t *S, ne10_uint16_t numStages, ne10_float32_t *pkCoeffs, ne10_float32_t *pvCoeffs, ne10_float32_t *pState, ne10_uint32_t blockSize)
Initialization function for the floating-point IIR lattice filter.
void ne10_iir_lattice_float_c(const ne10_iir_lattice_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Processing function for the floating-point IIR lattice filter.
Definition NE10_iir.c:118
Instance structure for the floating point IIR Lattice filter.
Definition NE10_types.h:420