GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 104 104 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_16ic_s32f_deinterleave_real_32f
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex 16 bit vector and returns just the real
16 * part (inphase) of the data as a vector of floats that have been
17 * scaled.
18 *
19 * <b>Dispatcher Prototype</b>
20 * \code
21 * void volk_16ic_s32f_deinterleave_real_32f(float* iBuffer, const lv_16sc_t*
22 * complexVector, const float scalar, unsigned int num_points){ \endcode
23 *
24 * \b Inputs
25 * \li complexVector: The complex input vector of 16-bit shorts.
26 * \li scalar: The value to be divided against each sample of the input complex vector.
27 * \li num_points: The number of complex data values to be deinterleaved.
28 *
29 * \b Outputs
30 * \li iBuffer: The floating point I buffer output data.
31 *
32 * \b Example
33 * \code
34 * int N = 10000;
35 *
36 * volk_16ic_s32f_deinterleave_real_32f();
37 *
38 * volk_free(x);
39 * volk_free(t);
40 * \endcode
41 */
42
43 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
44 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
45
46 #include <inttypes.h>
47 #include <stdio.h>
48 #include <volk/volk_common.h>
49
50 #ifdef LV_HAVE_AVX2
51 #include <immintrin.h>
52
53 static inline void
54 2 volk_16ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
55 const lv_16sc_t* complexVector,
56 const float scalar,
57 unsigned int num_points)
58 {
59 2 float* iBufferPtr = iBuffer;
60
61 2 unsigned int number = 0;
62 2 const unsigned int eighthPoints = num_points / 8;
63
64 __m256 iFloatValue;
65
66 2 const float iScalar = 1.0 / scalar;
67 2 __m256 invScalar = _mm256_set1_ps(iScalar);
68 __m256i complexVal, iIntVal;
69 __m128i complexVal128;
70 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
71
72 2 __m256i moveMask = _mm256_set_epi8(0x80,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 0x80,
78 0x80,
79 0x80,
80 13,
81 12,
82 9,
83 8,
84 5,
85 4,
86 1,
87 0,
88 0x80,
89 0x80,
90 0x80,
91 0x80,
92 0x80,
93 0x80,
94 0x80,
95 0x80,
96 13,
97 12,
98 9,
99 8,
100 5,
101 4,
102 1,
103 0);
104
105
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
106 32766 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
107 32766 complexVectorPtr += 32;
108 32766 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
109 32766 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
110 32766 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
111
112 32766 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
113 32766 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
114
115 32766 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116
117 _mm256_store_ps(iBufferPtr, iFloatValue);
118
119 32766 iBufferPtr += 8;
120 }
121
122 2 number = eighthPoints * 8;
123 2 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
124
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
125 14 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
126 14 sixteenTComplexVectorPtr++;
127 }
128 2 }
129 #endif /* LV_HAVE_AVX2 */
130
131 #ifdef LV_HAVE_SSE4_1
132 #include <smmintrin.h>
133
134 static inline void
135 2 volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
136 const lv_16sc_t* complexVector,
137 const float scalar,
138 unsigned int num_points)
139 {
140 2 float* iBufferPtr = iBuffer;
141
142 2 unsigned int number = 0;
143 2 const unsigned int quarterPoints = num_points / 4;
144
145 __m128 iFloatValue;
146
147 2 const float iScalar = 1.0 / scalar;
148 2 __m128 invScalar = _mm_set_ps1(iScalar);
149 __m128i complexVal, iIntVal;
150 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
151
152 2 __m128i moveMask = _mm_set_epi8(
153 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
154
155
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
156 65534 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
157 65534 complexVectorPtr += 16;
158 65534 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
159
160 65534 iIntVal = _mm_cvtepi16_epi32(complexVal);
161 65534 iFloatValue = _mm_cvtepi32_ps(iIntVal);
162
163 65534 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
164
165 _mm_store_ps(iBufferPtr, iFloatValue);
166
167 65534 iBufferPtr += 4;
168 }
169
170 2 number = quarterPoints * 4;
171 2 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
172
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
173 6 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
174 6 sixteenTComplexVectorPtr++;
175 }
176 2 }
177 #endif /* LV_HAVE_SSE4_1 */
178
179 #ifdef LV_HAVE_SSE
180 #include <xmmintrin.h>
181
182 static inline void
183 2 volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer,
184 const lv_16sc_t* complexVector,
185 const float scalar,
186 unsigned int num_points)
187 {
188 2 float* iBufferPtr = iBuffer;
189
190 2 unsigned int number = 0;
191 2 const unsigned int quarterPoints = num_points / 4;
192 __m128 iValue;
193
194 2 const float iScalar = 1.0 / scalar;
195 2 __m128 invScalar = _mm_set_ps1(iScalar);
196 2 int16_t* complexVectorPtr = (int16_t*)complexVector;
197
198 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
199
200
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
201 65534 floatBuffer[0] = (float)(*complexVectorPtr);
202 65534 complexVectorPtr += 2;
203 65534 floatBuffer[1] = (float)(*complexVectorPtr);
204 65534 complexVectorPtr += 2;
205 65534 floatBuffer[2] = (float)(*complexVectorPtr);
206 65534 complexVectorPtr += 2;
207 65534 floatBuffer[3] = (float)(*complexVectorPtr);
208 65534 complexVectorPtr += 2;
209
210 65534 iValue = _mm_load_ps(floatBuffer);
211
212 65534 iValue = _mm_mul_ps(iValue, invScalar);
213
214 _mm_store_ps(iBufferPtr, iValue);
215
216 65534 iBufferPtr += 4;
217 }
218
219 2 number = quarterPoints * 4;
220 2 complexVectorPtr = (int16_t*)&complexVector[number];
221
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
222 6 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
223 6 complexVectorPtr++;
224 }
225 2 }
226 #endif /* LV_HAVE_SSE */
227
228 #ifdef LV_HAVE_GENERIC
229 static inline void
230 2 volk_16ic_s32f_deinterleave_real_32f_generic(float* iBuffer,
231 const lv_16sc_t* complexVector,
232 const float scalar,
233 unsigned int num_points)
234 {
235 2 unsigned int number = 0;
236 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
237 2 float* iBufferPtr = iBuffer;
238 2 const float invScalar = 1.0 / scalar;
239
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
240 262142 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
241 262142 complexVectorPtr++;
242 }
243 2 }
244 #endif /* LV_HAVE_GENERIC */
245
246
247 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
248
249 #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
250 #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
251
252 #include <inttypes.h>
253 #include <stdio.h>
254 #include <volk/volk_common.h>
255
256 #ifdef LV_HAVE_AVX2
257 #include <immintrin.h>
258
259 static inline void
260 2 volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
261 const lv_16sc_t* complexVector,
262 const float scalar,
263 unsigned int num_points)
264 {
265 2 float* iBufferPtr = iBuffer;
266
267 2 unsigned int number = 0;
268 2 const unsigned int eighthPoints = num_points / 8;
269
270 __m256 iFloatValue;
271
272 2 const float iScalar = 1.0 / scalar;
273 2 __m256 invScalar = _mm256_set1_ps(iScalar);
274 __m256i complexVal, iIntVal;
275 __m128i complexVal128;
276 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
277
278 2 __m256i moveMask = _mm256_set_epi8(0x80,
279 0x80,
280 0x80,
281 0x80,
282 0x80,
283 0x80,
284 0x80,
285 0x80,
286 13,
287 12,
288 9,
289 8,
290 5,
291 4,
292 1,
293 0,
294 0x80,
295 0x80,
296 0x80,
297 0x80,
298 0x80,
299 0x80,
300 0x80,
301 0x80,
302 13,
303 12,
304 9,
305 8,
306 5,
307 4,
308 1,
309 0);
310
311
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
312 32766 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
313 32766 complexVectorPtr += 32;
314 32766 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
315 32766 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
316 32766 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
317
318 32766 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
319 32766 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
320
321 32766 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
322
323 _mm256_storeu_ps(iBufferPtr, iFloatValue);
324
325 32766 iBufferPtr += 8;
326 }
327
328 2 number = eighthPoints * 8;
329 2 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
330
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
331 14 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
332 14 sixteenTComplexVectorPtr++;
333 }
334 2 }
335 #endif /* LV_HAVE_AVX2 */
336
337 #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */
338