GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_magnitude_squared_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 125 125 100.0%
Functions: 8 8 100.0%
Branches: 28 28 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_magnitude_squared_32f
12 *
13 * \b Overview
14 *
15 * Calculates the magnitude squared of the complexVector and stores
16 * the results in the magnitudeVector.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_magnitude_squared_32f(float* magnitudeVector, const lv_32fc_t*
21 * complexVector, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of samples.
26 *
27 * \b Outputs
28 * \li magnitudeVector: The output value.
29 *
30 * \b Example
31 * Calculate the magnitude squared of \f$x^2 + x\f$ for points around the unit circle.
32 * \code
33 * int N = 10;
34 * unsigned int alignment = volk_get_alignment();
35 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
36 * float* magnitude = (float*)volk_malloc(sizeof(float)*N, alignment);
37 *
38 * for(unsigned int ii = 0; ii < N/2; ++ii){
39 * float real = 2.f * ((float)ii / (float)N) - 1.f;
40 * float imag = std::sqrt(1.f - real * real);
41 * in[ii] = lv_cmake(real, imag);
42 * in[ii] = in[ii] * in[ii] + in[ii];
43 * in[N-ii] = lv_cmake(real, imag);
44 * in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
45 * }
46 *
47 * volk_32fc_magnitude_32f(magnitude, in, N);
48 *
49 * for(unsigned int ii = 0; ii < N; ++ii){
50 * printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
51 * }
52 *
53 * volk_free(in);
54 * volk_free(magnitude);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
59 #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
60
61 #include <inttypes.h>
62 #include <math.h>
63 #include <stdio.h>
64
65 #ifdef LV_HAVE_AVX
66 #include <immintrin.h>
67 #include <volk/volk_avx_intrinsics.h>
68
69 2 static inline void volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector,
70 const lv_32fc_t* complexVector,
71 unsigned int num_points)
72 {
73 2 unsigned int number = 0;
74 2 const unsigned int eighthPoints = num_points / 8;
75
76 2 const float* complexVectorPtr = (float*)complexVector;
77 2 float* magnitudeVectorPtr = magnitudeVector;
78
79 __m256 cplxValue1, cplxValue2, result;
80
81
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
82 32766 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83 32766 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
84 32766 result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
85 _mm256_storeu_ps(magnitudeVectorPtr, result);
86
87 32766 complexVectorPtr += 16;
88 32766 magnitudeVectorPtr += 8;
89 }
90
91 2 number = eighthPoints * 8;
92
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
93 14 float val1Real = *complexVectorPtr++;
94 14 float val1Imag = *complexVectorPtr++;
95 14 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
96 }
97 2 }
98 #endif /* LV_HAVE_AVX */
99
100
101 #ifdef LV_HAVE_SSE3
102 #include <pmmintrin.h>
103 #include <volk/volk_sse3_intrinsics.h>
104
105 2 static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector,
106 const lv_32fc_t* complexVector,
107 unsigned int num_points)
108 {
109 2 unsigned int number = 0;
110 2 const unsigned int quarterPoints = num_points / 4;
111
112 2 const float* complexVectorPtr = (float*)complexVector;
113 2 float* magnitudeVectorPtr = magnitudeVector;
114
115 __m128 cplxValue1, cplxValue2, result;
116
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
117 65534 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
118 65534 complexVectorPtr += 4;
119
120 65534 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
121 65534 complexVectorPtr += 4;
122
123 65534 result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
124 _mm_storeu_ps(magnitudeVectorPtr, result);
125 65534 magnitudeVectorPtr += 4;
126 }
127
128 2 number = quarterPoints * 4;
129
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
130 6 float val1Real = *complexVectorPtr++;
131 6 float val1Imag = *complexVectorPtr++;
132 6 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
133 }
134 2 }
135 #endif /* LV_HAVE_SSE3 */
136
137
138 #ifdef LV_HAVE_SSE
139 #include <volk/volk_sse_intrinsics.h>
140 #include <xmmintrin.h>
141
142 2 static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector,
143 const lv_32fc_t* complexVector,
144 unsigned int num_points)
145 {
146 2 unsigned int number = 0;
147 2 const unsigned int quarterPoints = num_points / 4;
148
149 2 const float* complexVectorPtr = (float*)complexVector;
150 2 float* magnitudeVectorPtr = magnitudeVector;
151
152 __m128 cplxValue1, cplxValue2, result;
153
154
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
155 65534 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
156 65534 complexVectorPtr += 4;
157
158 65534 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
159 65534 complexVectorPtr += 4;
160
161 65534 result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
162 _mm_storeu_ps(magnitudeVectorPtr, result);
163 65534 magnitudeVectorPtr += 4;
164 }
165
166 2 number = quarterPoints * 4;
167
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
168 6 float val1Real = *complexVectorPtr++;
169 6 float val1Imag = *complexVectorPtr++;
170 6 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
171 }
172 2 }
173 #endif /* LV_HAVE_SSE */
174
175
176 #ifdef LV_HAVE_GENERIC
177
178 2 static inline void volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector,
179 const lv_32fc_t* complexVector,
180 unsigned int num_points)
181 {
182 2 const float* complexVectorPtr = (float*)complexVector;
183 2 float* magnitudeVectorPtr = magnitudeVector;
184 2 unsigned int number = 0;
185
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
186 262142 const float real = *complexVectorPtr++;
187 262142 const float imag = *complexVectorPtr++;
188 262142 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
189 }
190 2 }
191 #endif /* LV_HAVE_GENERIC */
192
193
194 #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
195 #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
196 #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
197
198 #include <inttypes.h>
199 #include <math.h>
200 #include <stdio.h>
201
202 #ifdef LV_HAVE_AVX
203 #include <immintrin.h>
204 #include <volk/volk_avx_intrinsics.h>
205
206 4 static inline void volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector,
207 const lv_32fc_t* complexVector,
208 unsigned int num_points)
209 {
210 4 unsigned int number = 0;
211 4 const unsigned int eighthPoints = num_points / 8;
212
213 4 const float* complexVectorPtr = (float*)complexVector;
214 4 float* magnitudeVectorPtr = magnitudeVector;
215
216 __m256 cplxValue1, cplxValue2, result;
217
2/2
✓ Branch 0 taken 65532 times.
✓ Branch 1 taken 4 times.
65536 for (; number < eighthPoints; number++) {
218 65532 cplxValue1 = _mm256_load_ps(complexVectorPtr);
219 65532 complexVectorPtr += 8;
220
221 65532 cplxValue2 = _mm256_load_ps(complexVectorPtr);
222 65532 complexVectorPtr += 8;
223
224 65532 result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
225 _mm256_store_ps(magnitudeVectorPtr, result);
226 65532 magnitudeVectorPtr += 8;
227 }
228
229 4 number = eighthPoints * 8;
230
2/2
✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.
32 for (; number < num_points; number++) {
231 28 float val1Real = *complexVectorPtr++;
232 28 float val1Imag = *complexVectorPtr++;
233 28 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
234 }
235 4 }
236 #endif /* LV_HAVE_AVX */
237
238
239 #ifdef LV_HAVE_SSE3
240 #include <pmmintrin.h>
241 #include <volk/volk_sse3_intrinsics.h>
242
243 2 static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector,
244 const lv_32fc_t* complexVector,
245 unsigned int num_points)
246 {
247 2 unsigned int number = 0;
248 2 const unsigned int quarterPoints = num_points / 4;
249
250 2 const float* complexVectorPtr = (float*)complexVector;
251 2 float* magnitudeVectorPtr = magnitudeVector;
252
253 __m128 cplxValue1, cplxValue2, result;
254
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
255 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
256 65534 complexVectorPtr += 4;
257
258 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
259 65534 complexVectorPtr += 4;
260
261 65534 result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
262 _mm_store_ps(magnitudeVectorPtr, result);
263 65534 magnitudeVectorPtr += 4;
264 }
265
266 2 number = quarterPoints * 4;
267
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
268 6 float val1Real = *complexVectorPtr++;
269 6 float val1Imag = *complexVectorPtr++;
270 6 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
271 }
272 2 }
273 #endif /* LV_HAVE_SSE3 */
274
275
276 #ifdef LV_HAVE_SSE
277 #include <volk/volk_sse_intrinsics.h>
278 #include <xmmintrin.h>
279
280 2 static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector,
281 const lv_32fc_t* complexVector,
282 unsigned int num_points)
283 {
284 2 unsigned int number = 0;
285 2 const unsigned int quarterPoints = num_points / 4;
286
287 2 const float* complexVectorPtr = (float*)complexVector;
288 2 float* magnitudeVectorPtr = magnitudeVector;
289
290 __m128 cplxValue1, cplxValue2, result;
291
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
292 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
293 65534 complexVectorPtr += 4;
294
295 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
296 65534 complexVectorPtr += 4;
297
298 65534 result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
299 _mm_store_ps(magnitudeVectorPtr, result);
300 65534 magnitudeVectorPtr += 4;
301 }
302
303 2 number = quarterPoints * 4;
304
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
305 6 float val1Real = *complexVectorPtr++;
306 6 float val1Imag = *complexVectorPtr++;
307 6 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
308 }
309 2 }
310 #endif /* LV_HAVE_SSE */
311
312
313 #ifdef LV_HAVE_NEON
314 #include <arm_neon.h>
315
316 static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector,
317 const lv_32fc_t* complexVector,
318 unsigned int num_points)
319 {
320 unsigned int number = 0;
321 const unsigned int quarterPoints = num_points / 4;
322
323 const float* complexVectorPtr = (float*)complexVector;
324 float* magnitudeVectorPtr = magnitudeVector;
325
326 float32x4x2_t cmplx_val;
327 float32x4_t result;
328 for (; number < quarterPoints; number++) {
329 cmplx_val = vld2q_f32(complexVectorPtr);
330 complexVectorPtr += 8;
331
332 cmplx_val.val[0] =
333 vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
334 cmplx_val.val[1] =
335 vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
336
337 result =
338 vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
339
340 vst1q_f32(magnitudeVectorPtr, result);
341 magnitudeVectorPtr += 4;
342 }
343
344 number = quarterPoints * 4;
345 for (; number < num_points; number++) {
346 float val1Real = *complexVectorPtr++;
347 float val1Imag = *complexVectorPtr++;
348 *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
349 }
350 }
351 #endif /* LV_HAVE_NEON */
352
353
354 #ifdef LV_HAVE_GENERIC
355
356 2 static inline void volk_32fc_magnitude_squared_32f_a_generic(
357 float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
358 {
359 2 const float* complexVectorPtr = (float*)complexVector;
360 2 float* magnitudeVectorPtr = magnitudeVector;
361 2 unsigned int number = 0;
362
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
363 262142 const float real = *complexVectorPtr++;
364 262142 const float imag = *complexVectorPtr++;
365 262142 *magnitudeVectorPtr++ = (real * real) + (imag * imag);
366 }
367 2 }
368 #endif /* LV_HAVE_GENERIC */
369
370 #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
371