GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_s32f_magnitude_16i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 115 115 100.0%
Functions: 5 5 100.0%
Branches: 10 10 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_s32f_magnitude_16i
12 *
13 * \b Overview
14 *
15 * Calculates the magnitude of the complexVector and stores the
16 * results in the magnitudeVector. The results are scaled and
17 * converted into 16-bit shorts.
18 *
19 * <b>Dispatcher Prototype</b>
20 * \code
21 * void volk_32fc_s32f_magnitude_16i(int16_t* magnitudeVector, const lv_32fc_t*
22 * complexVector, unsigned int num_points) \endcode
23 *
24 * \b Inputs
25 * \li complexVector: The complex input vector.
26 * \li num_points: The number of samples.
27 *
28 * \b Outputs
29 * \li magnitudeVector: The output value as 16-bit shorts.
30 *
31 * \b Example
32 * Generate points around the unit circle and map them to integers with
33 * magnitude 50 to preserve smallest deltas.
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
38 * int16_t* out = (int16_t*)volk_malloc(sizeof(int16_t)*N, alignment);
39 * float scale = 50.f;
40 *
41 * for(unsigned int ii = 0; ii < N/2; ++ii){
42 * // Generate points around the unit circle
43 * float real = -4.f * ((float)ii / (float)N) + 1.f;
44 * float imag = std::sqrt(1.f - real * real);
45 * in[ii] = lv_cmake(real, imag);
46 * in[ii+N/2] = lv_cmake(-real, -imag);
47 * }
48 *
49 * volk_32fc_s32f_magnitude_16i(out, in, scale, N);
50 *
51 * for(unsigned int ii = 0; ii < N; ++ii){
52 * printf("out[%u] = %i\n", ii, out[ii]);
53 * }
54 *
55 * volk_free(in);
56 * volk_free(out);
57 * \endcode
58 */
59
60 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
61 #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
62
63 #include <inttypes.h>
64 #include <math.h>
65 #include <stdio.h>
66 #include <volk/volk_common.h>
67
68 #ifdef LV_HAVE_GENERIC
69
70 10 static inline void volk_32fc_s32f_magnitude_16i_generic(int16_t* magnitudeVector,
71 const lv_32fc_t* complexVector,
72 const float scalar,
73 unsigned int num_points)
74 {
75 10 const float* complexVectorPtr = (float*)complexVector;
76 10 int16_t* magnitudeVectorPtr = magnitudeVector;
77 10 unsigned int number = 0;
78
2/2
✓ Branch 0 taken 262182 times.
✓ Branch 1 taken 10 times.
262192 for (number = 0; number < num_points; number++) {
79 262182 __VOLK_VOLATILE float real = *complexVectorPtr++;
80 262182 __VOLK_VOLATILE float imag = *complexVectorPtr++;
81 262182 real *= real;
82 262182 imag *= imag;
83 262182 *magnitudeVectorPtr++ = (int16_t)rintf(scalar * sqrtf(real + imag));
84 }
85 10 }
86 #endif /* LV_HAVE_GENERIC */
87
88 #ifdef LV_HAVE_AVX2
89 #include <immintrin.h>
90
91 2 static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
92 const lv_32fc_t* complexVector,
93 const float scalar,
94 unsigned int num_points)
95 {
96 2 unsigned int number = 0;
97 2 const unsigned int eighthPoints = num_points / 8;
98
99 2 const float* complexVectorPtr = (const float*)complexVector;
100 2 int16_t* magnitudeVectorPtr = magnitudeVector;
101
102 2 __m256 vScalar = _mm256_set1_ps(scalar);
103 2 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
104 __m256 cplxValue1, cplxValue2, result;
105 __m256i resultInt;
106 __m128i resultShort;
107
108
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
109 32766 cplxValue1 = _mm256_load_ps(complexVectorPtr);
110 32766 complexVectorPtr += 8;
111
112 32766 cplxValue2 = _mm256_load_ps(complexVectorPtr);
113 32766 complexVectorPtr += 8;
114
115 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
116 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
117
118 32766 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
119
120 32766 result = _mm256_sqrt_ps(result);
121
122 32766 result = _mm256_mul_ps(result, vScalar);
123
124 32766 resultInt = _mm256_cvtps_epi32(result);
125 32766 resultInt = _mm256_packs_epi32(resultInt, resultInt);
126 32766 resultInt = _mm256_permutevar8x32_epi32(
127 resultInt, idx); // permute to compensate for shuffling in hadd and packs
128 32766 resultShort = _mm256_extracti128_si256(resultInt, 0);
129 _mm_store_si128((__m128i*)magnitudeVectorPtr, resultShort);
130 32766 magnitudeVectorPtr += 8;
131 }
132
133 2 number = eighthPoints * 8;
134 2 volk_32fc_s32f_magnitude_16i_generic(
135 2 magnitudeVector + number, complexVector + number, scalar, num_points - number);
136 2 }
137 #endif /* LV_HAVE_AVX2 */
138
139 #ifdef LV_HAVE_SSE3
140 #include <pmmintrin.h>
141
142 2 static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector,
143 const lv_32fc_t* complexVector,
144 const float scalar,
145 unsigned int num_points)
146 {
147 2 unsigned int number = 0;
148 2 const unsigned int quarterPoints = num_points / 4;
149
150 2 const float* complexVectorPtr = (const float*)complexVector;
151 2 int16_t* magnitudeVectorPtr = magnitudeVector;
152
153 2 __m128 vScalar = _mm_set_ps1(scalar);
154
155 __m128 cplxValue1, cplxValue2, result;
156
157 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
158
159
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
160 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
161 65534 complexVectorPtr += 4;
162
163 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
164 65534 complexVectorPtr += 4;
165
166 65534 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
167 65534 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
168
169 65534 result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
170
171 65534 result = _mm_sqrt_ps(result);
172
173 65534 result = _mm_mul_ps(result, vScalar);
174
175 _mm_store_ps(floatBuffer, result);
176 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
177 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
178 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
179 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
180 }
181
182 2 number = quarterPoints * 4;
183 2 volk_32fc_s32f_magnitude_16i_generic(
184 2 magnitudeVector + number, complexVector + number, scalar, num_points - number);
185 2 }
186 #endif /* LV_HAVE_SSE3 */
187
188
189 #ifdef LV_HAVE_SSE
190 #include <xmmintrin.h>
191
192 2 static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector,
193 const lv_32fc_t* complexVector,
194 const float scalar,
195 unsigned int num_points)
196 {
197 2 unsigned int number = 0;
198 2 const unsigned int quarterPoints = num_points / 4;
199
200 2 const float* complexVectorPtr = (const float*)complexVector;
201 2 int16_t* magnitudeVectorPtr = magnitudeVector;
202
203 2 __m128 vScalar = _mm_set_ps1(scalar);
204
205 __m128 cplxValue1, cplxValue2, result;
206 __m128 iValue, qValue;
207
208 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
209
210
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
211 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
212 65534 complexVectorPtr += 4;
213
214 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
215 65534 complexVectorPtr += 4;
216
217 // Arrange in i1i2i3i4 format
218 65534 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
219 // Arrange in q1q2q3q4 format
220 65534 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
221
222 65534 __VOLK_VOLATILE __m128 iValue2 =
223 65534 _mm_mul_ps(iValue, iValue); // Square the I values
224 65534 __VOLK_VOLATILE __m128 qValue2 =
225 65534 _mm_mul_ps(qValue, qValue); // Square the Q Values
226
227 131068 result = _mm_add_ps(iValue2, qValue2); // Add the I2 and Q2 values
228
229 65534 result = _mm_sqrt_ps(result);
230
231 65534 result = _mm_mul_ps(result, vScalar);
232
233 _mm_store_ps(floatBuffer, result);
234 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
235 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
236 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
237 65534 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
238 }
239
240 2 number = quarterPoints * 4;
241 2 volk_32fc_s32f_magnitude_16i_generic(
242 2 magnitudeVector + number, complexVector + number, scalar, num_points - number);
243 2 }
244 #endif /* LV_HAVE_SSE */
245
246
247 #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
248
249 #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
250 #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
251
252 #include <inttypes.h>
253 #include <math.h>
254 #include <stdio.h>
255 #include <volk/volk_common.h>
256
257 #ifdef LV_HAVE_AVX2
258 #include <immintrin.h>
259
260 2 static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
261 const lv_32fc_t* complexVector,
262 const float scalar,
263 unsigned int num_points)
264 {
265 2 unsigned int number = 0;
266 2 const unsigned int eighthPoints = num_points / 8;
267
268 2 const float* complexVectorPtr = (const float*)complexVector;
269 2 int16_t* magnitudeVectorPtr = magnitudeVector;
270
271 2 __m256 vScalar = _mm256_set1_ps(scalar);
272 2 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
273 __m256 cplxValue1, cplxValue2, result;
274 __m256i resultInt;
275 __m128i resultShort;
276
277
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
278 32766 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
279 32766 complexVectorPtr += 8;
280
281 32766 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
282 32766 complexVectorPtr += 8;
283
284 32766 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
285 32766 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
286
287 32766 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
288
289 32766 result = _mm256_sqrt_ps(result);
290
291 32766 result = _mm256_mul_ps(result, vScalar);
292
293 32766 resultInt = _mm256_cvtps_epi32(result);
294 32766 resultInt = _mm256_packs_epi32(resultInt, resultInt);
295 32766 resultInt = _mm256_permutevar8x32_epi32(
296 resultInt, idx); // permute to compensate for shuffling in hadd and packs
297 32766 resultShort = _mm256_extracti128_si256(resultInt, 0);
298 _mm_storeu_si128((__m128i*)magnitudeVectorPtr, resultShort);
299 32766 magnitudeVectorPtr += 8;
300 }
301
302 2 number = eighthPoints * 8;
303 2 volk_32fc_s32f_magnitude_16i_generic(
304 2 magnitudeVector + number, complexVector + number, scalar, num_points - number);
305 2 }
306 #endif /* LV_HAVE_AVX2 */
307
308 #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_u_H */
309