GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 112 112 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_8ic_s32f_deinterleave_real_32f
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex 8-bit char vector into just the real (I)
16 * vector, converts the samples to floats, and divides the results by
17 * the scalar factor.
18 *
19 * <b>Dispatcher Prototype</b>
20 * \code
21 * void volk_8ic_s32f_deinterleave_real_32f(float* iBuffer, const lv_8sc_t* complexVector,
22 * const float scalar, unsigned int num_points) \endcode
23 *
24 * \b Inputs
25 * \li complexVector: The complex input vector.
26 * \li scalar: The scalar value used to divide the floating point results.
27 * \li num_points: The number of complex data values to be deinterleaved.
28 *
29 * \b Outputs
30 * \li iBuffer: The I buffer output data.
31 *
32 * \b Example
33 * \code
34 * int N = 10000;
35 *
36 * volk_8ic_s32f_deinterleave_real_32f();
37 *
38 * volk_free(x);
39 * \endcode
40 */
41
42 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
43 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
44
45 #include <inttypes.h>
46 #include <stdio.h>
47 #include <volk/volk_common.h>
48
49 #ifdef LV_HAVE_AVX2
50 #include <immintrin.h>
51
52 static inline void
53 2 volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
54 const lv_8sc_t* complexVector,
55 const float scalar,
56 unsigned int num_points)
57 {
58 2 float* iBufferPtr = iBuffer;
59
60 2 unsigned int number = 0;
61 2 const unsigned int sixteenthPoints = num_points / 16;
62 __m256 iFloatValue;
63
64 2 const float iScalar = 1.0 / scalar;
65 2 __m256 invScalar = _mm256_set1_ps(iScalar);
66 __m256i complexVal, iIntVal;
67 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
68
69 2 __m256i moveMask = _mm256_set_epi8(0x80,
70 0x80,
71 0x80,
72 0x80,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 14,
78 12,
79 10,
80 8,
81 6,
82 4,
83 2,
84 0,
85 0x80,
86 0x80,
87 0x80,
88 0x80,
89 0x80,
90 0x80,
91 0x80,
92 0x80,
93 14,
94 12,
95 10,
96 8,
97 6,
98 4,
99 2,
100 0);
101
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
102 16382 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
103 16382 complexVectorPtr += 32;
104 16382 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
105
106 32764 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
107 16382 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
108 16382 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
109 _mm256_store_ps(iBufferPtr, iFloatValue);
110 16382 iBufferPtr += 8;
111
112 16382 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
113 32764 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
114 16382 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115 16382 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116 _mm256_store_ps(iBufferPtr, iFloatValue);
117 16382 iBufferPtr += 8;
118 }
119
120 2 number = sixteenthPoints * 16;
121
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
122 30 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
123 30 complexVectorPtr++;
124 }
125 2 }
126 #endif /* LV_HAVE_AVX2 */
127
128
129 #ifdef LV_HAVE_SSE4_1
130 #include <smmintrin.h>
131
132 static inline void
133 2 volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
134 const lv_8sc_t* complexVector,
135 const float scalar,
136 unsigned int num_points)
137 {
138 2 float* iBufferPtr = iBuffer;
139
140 2 unsigned int number = 0;
141 2 const unsigned int eighthPoints = num_points / 8;
142 __m128 iFloatValue;
143
144 2 const float iScalar = 1.0 / scalar;
145 2 __m128 invScalar = _mm_set_ps1(iScalar);
146 __m128i complexVal, iIntVal;
147 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
148
149 2 __m128i moveMask = _mm_set_epi8(
150 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
151
152
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
153 32766 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
154 32766 complexVectorPtr += 16;
155 32766 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
156
157 32766 iIntVal = _mm_cvtepi8_epi32(complexVal);
158 32766 iFloatValue = _mm_cvtepi32_ps(iIntVal);
159
160 32766 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
161
162 _mm_store_ps(iBufferPtr, iFloatValue);
163
164 32766 iBufferPtr += 4;
165
166 32766 complexVal = _mm_srli_si128(complexVal, 4);
167 32766 iIntVal = _mm_cvtepi8_epi32(complexVal);
168 32766 iFloatValue = _mm_cvtepi32_ps(iIntVal);
169
170 32766 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
171
172 _mm_store_ps(iBufferPtr, iFloatValue);
173
174 32766 iBufferPtr += 4;
175 }
176
177 2 number = eighthPoints * 8;
178
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
179 14 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
180 14 complexVectorPtr++;
181 }
182 2 }
183 #endif /* LV_HAVE_SSE4_1 */
184
185
186 #ifdef LV_HAVE_SSE
187 #include <xmmintrin.h>
188
189 static inline void
190 2 volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer,
191 const lv_8sc_t* complexVector,
192 const float scalar,
193 unsigned int num_points)
194 {
195 2 float* iBufferPtr = iBuffer;
196
197 2 unsigned int number = 0;
198 2 const unsigned int quarterPoints = num_points / 4;
199 __m128 iValue;
200
201 2 const float iScalar = 1.0 / scalar;
202 2 __m128 invScalar = _mm_set_ps1(iScalar);
203 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
204
205 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
206
207
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
208 65534 floatBuffer[0] = (float)(*complexVectorPtr);
209 65534 complexVectorPtr += 2;
210 65534 floatBuffer[1] = (float)(*complexVectorPtr);
211 65534 complexVectorPtr += 2;
212 65534 floatBuffer[2] = (float)(*complexVectorPtr);
213 65534 complexVectorPtr += 2;
214 65534 floatBuffer[3] = (float)(*complexVectorPtr);
215 65534 complexVectorPtr += 2;
216
217 65534 iValue = _mm_load_ps(floatBuffer);
218
219 65534 iValue = _mm_mul_ps(iValue, invScalar);
220
221 _mm_store_ps(iBufferPtr, iValue);
222
223 65534 iBufferPtr += 4;
224 }
225
226 2 number = quarterPoints * 4;
227
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
228 6 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
229 6 complexVectorPtr++;
230 }
231 2 }
232 #endif /* LV_HAVE_SSE */
233
234
235 #ifdef LV_HAVE_GENERIC
236
237 static inline void
238 2 volk_8ic_s32f_deinterleave_real_32f_generic(float* iBuffer,
239 const lv_8sc_t* complexVector,
240 const float scalar,
241 unsigned int num_points)
242 {
243 2 unsigned int number = 0;
244 2 const int8_t* complexVectorPtr = (const int8_t*)complexVector;
245 2 float* iBufferPtr = iBuffer;
246 2 const float invScalar = 1.0 / scalar;
247
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
248 262142 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
249 262142 complexVectorPtr++;
250 }
251 2 }
252 #endif /* LV_HAVE_GENERIC */
253
254
255 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
256
257 #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
258 #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
259
260 #include <inttypes.h>
261 #include <stdio.h>
262 #include <volk/volk_common.h>
263
264 #ifdef LV_HAVE_AVX2
265 #include <immintrin.h>
266
267 static inline void
268 2 volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
269 const lv_8sc_t* complexVector,
270 const float scalar,
271 unsigned int num_points)
272 {
273 2 float* iBufferPtr = iBuffer;
274
275 2 unsigned int number = 0;
276 2 const unsigned int sixteenthPoints = num_points / 16;
277 __m256 iFloatValue;
278
279 2 const float iScalar = 1.0 / scalar;
280 2 __m256 invScalar = _mm256_set1_ps(iScalar);
281 __m256i complexVal, iIntVal;
282 __m128i hcomplexVal;
283 2 int8_t* complexVectorPtr = (int8_t*)complexVector;
284
285 2 __m256i moveMask = _mm256_set_epi8(0x80,
286 0x80,
287 0x80,
288 0x80,
289 0x80,
290 0x80,
291 0x80,
292 0x80,
293 14,
294 12,
295 10,
296 8,
297 6,
298 4,
299 2,
300 0,
301 0x80,
302 0x80,
303 0x80,
304 0x80,
305 0x80,
306 0x80,
307 0x80,
308 0x80,
309 14,
310 12,
311 10,
312 8,
313 6,
314 4,
315 2,
316 0);
317
318
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (; number < sixteenthPoints; number++) {
319 16382 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
320 16382 complexVectorPtr += 32;
321 16382 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
322
323 16382 hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
324 16382 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
325 16382 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
326
327 16382 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
328
329 _mm256_storeu_ps(iBufferPtr, iFloatValue);
330
331 16382 iBufferPtr += 8;
332
333 16382 hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
334 16382 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
335 16382 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
336
337 16382 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
338
339 _mm256_storeu_ps(iBufferPtr, iFloatValue);
340
341 16382 iBufferPtr += 8;
342 }
343
344 2 number = sixteenthPoints * 16;
345
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
346 30 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
347 30 complexVectorPtr++;
348 }
349 2 }
350 #endif /* LV_HAVE_AVX2 */
351
352
353 #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
354