GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_magnitude_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 125 125 100.0%
Functions: 8 8 100.0%
Branches: 28 28 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_magnitude_32f
12 *
13 * \b Overview
14 *
15 * Calculates the magnitude of the complexVector and stores the
16 * results in the magnitudeVector.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_magnitude_32f(float* magnitudeVector, const lv_32fc_t* complexVector,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of samples.
26 *
27 * \b Outputs
28 * \li magnitudeVector: The output value.
29 *
30 * \b Example
31 * Calculate the magnitude of \f$x^2 + x\f$ for points around the unit circle.
32 * \code
33 * int N = 10;
34 * unsigned int alignment = volk_get_alignment();
35 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
36 * float* magnitude = (float*)volk_malloc(sizeof(float)*N, alignment);
37 *
38 * for(unsigned int ii = 0; ii < N/2; ++ii){
39 * float real = 2.f * ((float)ii / (float)N) - 1.f;
40 * float imag = std::sqrt(1.f - real * real);
41 * in[ii] = lv_cmake(real, imag);
42 * in[ii] = in[ii] * in[ii] + in[ii];
43 * in[N-ii] = lv_cmake(real, imag);
44 * in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
45 * }
46 *
47 * volk_32fc_magnitude_32f(magnitude, in, N);
48 *
49 * for(unsigned int ii = 0; ii < N; ++ii){
50 * printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
51 * }
52 *
53 * volk_free(in);
54 * volk_free(magnitude);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
59 #define INCLUDED_volk_32fc_magnitude_32f_u_H
60
61 #include <inttypes.h>
62 #include <math.h>
63 #include <stdio.h>
64
65 #ifdef LV_HAVE_AVX
66 #include <immintrin.h>
67 #include <volk/volk_avx_intrinsics.h>
68
69 2 static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector,
70 const lv_32fc_t* complexVector,
71 unsigned int num_points)
72 {
73 2 unsigned int number = 0;
74 2 const unsigned int eighthPoints = num_points / 8;
75
76 2 const float* complexVectorPtr = (float*)complexVector;
77 2 float* magnitudeVectorPtr = magnitudeVector;
78
79 __m256 cplxValue1, cplxValue2, result;
80
81
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
82 32766 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83 32766 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
84 32766 result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
85 _mm256_storeu_ps(magnitudeVectorPtr, result);
86
87 32766 complexVectorPtr += 16;
88 32766 magnitudeVectorPtr += 8;
89 }
90
91 2 number = eighthPoints * 8;
92
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
93 14 float val1Real = *complexVectorPtr++;
94 14 float val1Imag = *complexVectorPtr++;
95 14 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
96 }
97 2 }
98 #endif /* LV_HAVE_AVX */
99
100 #ifdef LV_HAVE_SSE3
101 #include <pmmintrin.h>
102 #include <volk/volk_sse3_intrinsics.h>
103
104 2 static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector,
105 const lv_32fc_t* complexVector,
106 unsigned int num_points)
107 {
108 2 unsigned int number = 0;
109 2 const unsigned int quarterPoints = num_points / 4;
110
111 2 const float* complexVectorPtr = (float*)complexVector;
112 2 float* magnitudeVectorPtr = magnitudeVector;
113
114 __m128 cplxValue1, cplxValue2, result;
115
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
116 65534 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
117 65534 complexVectorPtr += 4;
118
119 65534 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
120 65534 complexVectorPtr += 4;
121
122 65534 result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
123
124 _mm_storeu_ps(magnitudeVectorPtr, result);
125 65534 magnitudeVectorPtr += 4;
126 }
127
128 2 number = quarterPoints * 4;
129
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
130 6 float val1Real = *complexVectorPtr++;
131 6 float val1Imag = *complexVectorPtr++;
132 6 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
133 }
134 2 }
135 #endif /* LV_HAVE_SSE3 */
136
137
138 #ifdef LV_HAVE_SSE
139 #include <volk/volk_sse_intrinsics.h>
140 #include <xmmintrin.h>
141
142 2 static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector,
143 const lv_32fc_t* complexVector,
144 unsigned int num_points)
145 {
146 2 unsigned int number = 0;
147 2 const unsigned int quarterPoints = num_points / 4;
148
149 2 const float* complexVectorPtr = (float*)complexVector;
150 2 float* magnitudeVectorPtr = magnitudeVector;
151
152 __m128 cplxValue1, cplxValue2, result;
153
154
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
155 65534 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
156 65534 complexVectorPtr += 4;
157
158 65534 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
159 65534 complexVectorPtr += 4;
160
161 65534 result = _mm_magnitude_ps(cplxValue1, cplxValue2);
162 _mm_storeu_ps(magnitudeVectorPtr, result);
163 65534 magnitudeVectorPtr += 4;
164 }
165
166 2 number = quarterPoints * 4;
167
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
168 6 float val1Real = *complexVectorPtr++;
169 6 float val1Imag = *complexVectorPtr++;
170 6 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
171 }
172 2 }
173 #endif /* LV_HAVE_SSE */
174
175
176 #ifdef LV_HAVE_GENERIC
177
178 2 static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector,
179 const lv_32fc_t* complexVector,
180 unsigned int num_points)
181 {
182 2 const float* complexVectorPtr = (float*)complexVector;
183 2 float* magnitudeVectorPtr = magnitudeVector;
184 2 unsigned int number = 0;
185
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
186 262142 const float real = *complexVectorPtr++;
187 262142 const float imag = *complexVectorPtr++;
188 262142 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
189 }
190 2 }
191 #endif /* LV_HAVE_GENERIC */
192
193
194 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
195 #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
196 #define INCLUDED_volk_32fc_magnitude_32f_a_H
197
198 #include <inttypes.h>
199 #include <math.h>
200 #include <stdio.h>
201
202 #ifdef LV_HAVE_AVX
203 #include <immintrin.h>
204 #include <volk/volk_avx_intrinsics.h>
205
206 2 static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector,
207 const lv_32fc_t* complexVector,
208 unsigned int num_points)
209 {
210 2 unsigned int number = 0;
211 2 const unsigned int eighthPoints = num_points / 8;
212
213 2 const float* complexVectorPtr = (float*)complexVector;
214 2 float* magnitudeVectorPtr = magnitudeVector;
215
216 __m256 cplxValue1, cplxValue2, result;
217
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
218 32766 cplxValue1 = _mm256_load_ps(complexVectorPtr);
219 32766 complexVectorPtr += 8;
220
221 32766 cplxValue2 = _mm256_load_ps(complexVectorPtr);
222 32766 complexVectorPtr += 8;
223
224 32766 result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
225 _mm256_store_ps(magnitudeVectorPtr, result);
226 32766 magnitudeVectorPtr += 8;
227 }
228
229 2 number = eighthPoints * 8;
230
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
231 14 float val1Real = *complexVectorPtr++;
232 14 float val1Imag = *complexVectorPtr++;
233 14 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
234 }
235 2 }
236 #endif /* LV_HAVE_AVX */
237
238 #ifdef LV_HAVE_SSE3
239 #include <pmmintrin.h>
240 #include <volk/volk_sse3_intrinsics.h>
241
242 2 static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector,
243 const lv_32fc_t* complexVector,
244 unsigned int num_points)
245 {
246 2 unsigned int number = 0;
247 2 const unsigned int quarterPoints = num_points / 4;
248
249 2 const float* complexVectorPtr = (float*)complexVector;
250 2 float* magnitudeVectorPtr = magnitudeVector;
251
252 __m128 cplxValue1, cplxValue2, result;
253
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
254 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
255 65534 complexVectorPtr += 4;
256
257 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
258 65534 complexVectorPtr += 4;
259
260 65534 result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
261 _mm_store_ps(magnitudeVectorPtr, result);
262 65534 magnitudeVectorPtr += 4;
263 }
264
265 2 number = quarterPoints * 4;
266
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
267 6 float val1Real = *complexVectorPtr++;
268 6 float val1Imag = *complexVectorPtr++;
269 6 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
270 }
271 2 }
272 #endif /* LV_HAVE_SSE3 */
273
274 #ifdef LV_HAVE_SSE
275 #include <volk/volk_sse_intrinsics.h>
276 #include <xmmintrin.h>
277
278 2 static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
279 const lv_32fc_t* complexVector,
280 unsigned int num_points)
281 {
282 2 unsigned int number = 0;
283 2 const unsigned int quarterPoints = num_points / 4;
284
285 2 const float* complexVectorPtr = (float*)complexVector;
286 2 float* magnitudeVectorPtr = magnitudeVector;
287
288 __m128 cplxValue1, cplxValue2, result;
289
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
290 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
291 65534 complexVectorPtr += 4;
292
293 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
294 65534 complexVectorPtr += 4;
295
296 65534 result = _mm_magnitude_ps(cplxValue1, cplxValue2);
297 _mm_store_ps(magnitudeVectorPtr, result);
298 65534 magnitudeVectorPtr += 4;
299 }
300
301 2 number = quarterPoints * 4;
302
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
303 6 float val1Real = *complexVectorPtr++;
304 6 float val1Imag = *complexVectorPtr++;
305 6 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
306 }
307 2 }
308 #endif /* LV_HAVE_SSE */
309
310
311 #ifdef LV_HAVE_GENERIC
312
313 2 static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
314 const lv_32fc_t* complexVector,
315 unsigned int num_points)
316 {
317 2 const float* complexVectorPtr = (float*)complexVector;
318 2 float* magnitudeVectorPtr = magnitudeVector;
319 2 unsigned int number = 0;
320
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
321 262142 const float real = *complexVectorPtr++;
322 262142 const float imag = *complexVectorPtr++;
323 262142 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
324 }
325 2 }
326 #endif /* LV_HAVE_GENERIC */
327
328
329 #ifdef LV_HAVE_NEON
330 #include <arm_neon.h>
331
332 static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector,
333 const lv_32fc_t* complexVector,
334 unsigned int num_points)
335 {
336 unsigned int number;
337 unsigned int quarter_points = num_points / 4;
338 const float* complexVectorPtr = (float*)complexVector;
339 float* magnitudeVectorPtr = magnitudeVector;
340
341 float32x4x2_t complex_vec;
342 float32x4_t magnitude_vec;
343 for (number = 0; number < quarter_points; number++) {
344 complex_vec = vld2q_f32(complexVectorPtr);
345 complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
346 magnitude_vec =
347 vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
348 magnitude_vec = vrsqrteq_f32(magnitude_vec);
349 magnitude_vec = vrecpeq_f32(magnitude_vec); // no plain ol' sqrt
350 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
351
352 complexVectorPtr += 8;
353 magnitudeVectorPtr += 4;
354 }
355
356 for (number = quarter_points * 4; number < num_points; number++) {
357 const float real = *complexVectorPtr++;
358 const float imag = *complexVectorPtr++;
359 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
360 }
361 }
362 #endif /* LV_HAVE_NEON */
363
364
365 #ifdef LV_HAVE_NEON
366 /*!
367 \brief Calculates the magnitude of the complexVector and stores the results in the
368 magnitudeVector
369
370 This is an approximation from "Streamlining Digital Signal Processing" by
371 Richard Lyons. Apparently max error is about 1% and mean error is about 0.6%.
372 The basic idea is to do a weighted sum of the abs. value of imag and real parts
373 where weight A is always assigned to max(imag, real) and B is always min(imag,real).
374 There are two pairs of cofficients chosen based on whether min <= 0.4142 max.
375 This method is called equiripple-error magnitude estimation proposed by Filip in '73
376
377 \param complexVector The vector containing the complex input values
378 \param magnitudeVector The vector containing the real output values
379 \param num_points The number of complex values in complexVector to be calculated and
380 stored into cVector
381 */
382 static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(
383 float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
384 {
385 unsigned int number;
386 unsigned int quarter_points = num_points / 4;
387 const float* complexVectorPtr = (float*)complexVector;
388 float* magnitudeVectorPtr = magnitudeVector;
389
390 const float threshold = 0.4142135;
391
392 float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
393 a_high = vdupq_n_f32(0.84);
394 b_high = vdupq_n_f32(0.561);
395 a_low = vdupq_n_f32(0.99);
396 b_low = vdupq_n_f32(0.197);
397
398 uint32x4_t comp0, comp1;
399
400 float32x4x2_t complex_vec;
401 float32x4_t min_vec, max_vec, magnitude_vec;
402 float32x4_t real_abs, imag_abs;
403 for (number = 0; number < quarter_points; number++) {
404 complex_vec = vld2q_f32(complexVectorPtr);
405
406 real_abs = vabsq_f32(complex_vec.val[0]);
407 imag_abs = vabsq_f32(complex_vec.val[1]);
408
409 min_vec = vminq_f32(real_abs, imag_abs);
410 max_vec = vmaxq_f32(real_abs, imag_abs);
411
412 // effective branch to choose coefficient pair.
413 comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
414 comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
415
416 // and 0s or 1s with coefficients from previous effective branch
417 a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
418 vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
419 b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
420 vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
421
422 // coefficients chosen, do the weighted sum
423 min_vec = vmulq_f32(min_vec, b_vec);
424 max_vec = vmulq_f32(max_vec, a_vec);
425
426 magnitude_vec = vaddq_f32(min_vec, max_vec);
427 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
428
429 complexVectorPtr += 8;
430 magnitudeVectorPtr += 4;
431 }
432
433 for (number = quarter_points * 4; number < num_points; number++) {
434 const float real = *complexVectorPtr++;
435 const float imag = *complexVectorPtr++;
436 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
437 }
438 }
439 #endif /* LV_HAVE_NEON */
440
441
442 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
443