GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_16ic_s32f_magnitude_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 143 143 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_16ic_s32f_magnitude_32f
12 *
13 * \b Overview
14 *
15 * Computes the magnitude of the complexVector and stores the results
16 * in the magnitudeVector as a scaled floating point number.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_16ic_s32f_magnitude_32f(float* magnitudeVector, const lv_16sc_t*
21 * complexVector, const float scalar, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector of complex 16-bit shorts.
25 * \li scalar: The value to be divided against each sample of the input complex vector.
26 * \li num_points: The number of samples.
27 *
28 * \b Outputs
29 * \li magnitudeVector: The magnitude of the complex values.
30 *
31 * \b Example
32 * \code
33 * int N = 10000;
34 *
35 * volk_16ic_s32f_magnitude_32f();
36 *
37 * volk_free(x);
38 * volk_free(t);
39 * \endcode
40 */
41
42 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
43 #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
44
45 #include <inttypes.h>
46 #include <math.h>
47 #include <stdio.h>
48 #include <volk/volk_common.h>
49
50 #ifdef LV_HAVE_AVX2
51 #include <immintrin.h>
52
53 2 static inline void volk_16ic_s32f_magnitude_32f_a_avx2(float* magnitudeVector,
54 const lv_16sc_t* complexVector,
55 const float scalar,
56 unsigned int num_points)
57 {
58 2 unsigned int number = 0;
59 2 const unsigned int eighthPoints = num_points / 8;
60
61 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
62 2 float* magnitudeVectorPtr = magnitudeVector;
63
64 4 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
65
66 __m256 cplxValue1, cplxValue2, result;
67 __m256i int1, int2;
68 __m128i short1, short2;
69 2 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
70
71
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
72
73 32766 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
74 32766 complexVectorPtr += 16;
75 32766 short1 = _mm256_extracti128_si256(int1, 0);
76 32766 short2 = _mm256_extracti128_si256(int1, 1);
77
78 32766 int1 = _mm256_cvtepi16_epi32(short1);
79 32766 int2 = _mm256_cvtepi16_epi32(short2);
80 32766 cplxValue1 = _mm256_cvtepi32_ps(int1);
81 32766 cplxValue2 = _mm256_cvtepi32_ps(int2);
82
83 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
84 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
85
86 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
87 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
88
89 32766 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
90 32766 result = _mm256_permutevar8x32_ps(result, idx);
91
92 32766 result = _mm256_sqrt_ps(result); // Square root the values
93
94 _mm256_store_ps(magnitudeVectorPtr, result);
95
96 32766 magnitudeVectorPtr += 8;
97 }
98
99 2 number = eighthPoints * 8;
100 2 magnitudeVectorPtr = &magnitudeVector[number];
101 2 complexVectorPtr = (const int16_t*)&complexVector[number];
102
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
103 14 float val1Real = (float)(*complexVectorPtr++) / scalar;
104 14 float val1Imag = (float)(*complexVectorPtr++) / scalar;
105 14 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
106 }
107 2 }
108 #endif /* LV_HAVE_AVX2 */
109
110
111 #ifdef LV_HAVE_SSE3
112 #include <pmmintrin.h>
113
114 2 static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector,
115 const lv_16sc_t* complexVector,
116 const float scalar,
117 unsigned int num_points)
118 {
119 2 unsigned int number = 0;
120 2 const unsigned int quarterPoints = num_points / 4;
121
122 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
123 2 float* magnitudeVectorPtr = magnitudeVector;
124
125 2 __m128 invScalar = _mm_set_ps1(1.0 / scalar);
126
127 __m128 cplxValue1, cplxValue2, result;
128
129 __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
130
131
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
132
133 65534 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
134 65534 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
135 65534 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
136 65534 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
137
138 65534 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
139 65534 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
140 65534 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
141 65534 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
142
143 65534 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
144 65534 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
145
146 65534 complexVectorPtr += 8;
147
148 65534 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
149 65534 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
150
151 65534 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
152 65534 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
153
154 65534 result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
155
156 65534 result = _mm_sqrt_ps(result); // Square root the values
157
158 _mm_store_ps(magnitudeVectorPtr, result);
159
160 65534 magnitudeVectorPtr += 4;
161 }
162
163 2 number = quarterPoints * 4;
164 2 magnitudeVectorPtr = &magnitudeVector[number];
165 2 complexVectorPtr = (const int16_t*)&complexVector[number];
166
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
167 6 float val1Real = (float)(*complexVectorPtr++) / scalar;
168 6 float val1Imag = (float)(*complexVectorPtr++) / scalar;
169 6 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
170 }
171 2 }
172 #endif /* LV_HAVE_SSE3 */
173
174 #ifdef LV_HAVE_SSE
175 #include <xmmintrin.h>
176
177 2 static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector,
178 const lv_16sc_t* complexVector,
179 const float scalar,
180 unsigned int num_points)
181 {
182 2 unsigned int number = 0;
183 2 const unsigned int quarterPoints = num_points / 4;
184
185 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
186 2 float* magnitudeVectorPtr = magnitudeVector;
187
188 2 const float iScalar = 1.0 / scalar;
189 2 __m128 invScalar = _mm_set_ps1(iScalar);
190
191 __m128 cplxValue1, cplxValue2, result, re, im;
192
193 __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
194
195
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
196 65534 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
197 65534 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
198 65534 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
199 65534 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
200
201 65534 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
202 65534 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
203 65534 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
204 65534 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
205
206 65534 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
207 65534 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
208
209 65534 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
210 65534 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
211
212 65534 complexVectorPtr += 8;
213
214 65534 cplxValue1 = _mm_mul_ps(re, invScalar);
215 65534 cplxValue2 = _mm_mul_ps(im, invScalar);
216
217 65534 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
218 65534 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
219
220 65534 result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
221
222 65534 result = _mm_sqrt_ps(result); // Square root the values
223
224 _mm_store_ps(magnitudeVectorPtr, result);
225
226 65534 magnitudeVectorPtr += 4;
227 }
228
229 2 number = quarterPoints * 4;
230 2 magnitudeVectorPtr = &magnitudeVector[number];
231 2 complexVectorPtr = (const int16_t*)&complexVector[number];
232
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
233 6 float val1Real = (float)(*complexVectorPtr++) * iScalar;
234 6 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
235 6 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
236 }
237 2 }
238
239
240 #endif /* LV_HAVE_SSE */
241
242 #ifdef LV_HAVE_GENERIC
243
244 2 static inline void volk_16ic_s32f_magnitude_32f_generic(float* magnitudeVector,
245 const lv_16sc_t* complexVector,
246 const float scalar,
247 unsigned int num_points)
248 {
249 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
250 2 float* magnitudeVectorPtr = magnitudeVector;
251 2 unsigned int number = 0;
252 2 const float invScalar = 1.0 / scalar;
253
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
254 262142 float real = ((float)(*complexVectorPtr++)) * invScalar;
255 262142 float imag = ((float)(*complexVectorPtr++)) * invScalar;
256 262142 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
257 }
258 2 }
259 #endif /* LV_HAVE_GENERIC */
260
261
262 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
263
264 #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
265 #define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
266
267 #include <inttypes.h>
268 #include <math.h>
269 #include <stdio.h>
270 #include <volk/volk_common.h>
271
272 #ifdef LV_HAVE_AVX2
273 #include <immintrin.h>
274
275 2 static inline void volk_16ic_s32f_magnitude_32f_u_avx2(float* magnitudeVector,
276 const lv_16sc_t* complexVector,
277 const float scalar,
278 unsigned int num_points)
279 {
280 2 unsigned int number = 0;
281 2 const unsigned int eighthPoints = num_points / 8;
282
283 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
284 2 float* magnitudeVectorPtr = magnitudeVector;
285
286 4 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
287
288 __m256 cplxValue1, cplxValue2, result;
289 __m256i int1, int2;
290 __m128i short1, short2;
291 2 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
292
293
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
294
295 32766 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
296 32766 complexVectorPtr += 16;
297 32766 short1 = _mm256_extracti128_si256(int1, 0);
298 32766 short2 = _mm256_extracti128_si256(int1, 1);
299
300 32766 int1 = _mm256_cvtepi16_epi32(short1);
301 32766 int2 = _mm256_cvtepi16_epi32(short2);
302 32766 cplxValue1 = _mm256_cvtepi32_ps(int1);
303 32766 cplxValue2 = _mm256_cvtepi32_ps(int2);
304
305 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
306 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
307
308 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
309 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
310
311 32766 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
312 32766 result = _mm256_permutevar8x32_ps(result, idx);
313
314 32766 result = _mm256_sqrt_ps(result); // Square root the values
315
316 _mm256_storeu_ps(magnitudeVectorPtr, result);
317
318 32766 magnitudeVectorPtr += 8;
319 }
320
321 2 number = eighthPoints * 8;
322 2 magnitudeVectorPtr = &magnitudeVector[number];
323 2 complexVectorPtr = (const int16_t*)&complexVector[number];
324
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
325 14 float val1Real = (float)(*complexVectorPtr++) / scalar;
326 14 float val1Imag = (float)(*complexVectorPtr++) / scalar;
327 14 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
328 }
329 2 }
330 #endif /* LV_HAVE_AVX2 */
331
332 #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_u_H */
333