GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_8ic_deinterleave_real_8i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 104 104 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_8ic_deinterleave_real_8i
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex 8-bit char vector into just the I (real)
16 * vector.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_8ic_deinterleave_real_8i(int8_t* iBuffer, const lv_8sc_t* complexVector,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * volk_8ic_deinterleave_real_8i();
35 *
36 * volk_free(x);
37 * \endcode
38 */
39
40 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
41 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
42
43 #include <inttypes.h>
44 #include <stdio.h>
45
46 #ifdef LV_HAVE_AVX2
47 #include <immintrin.h>
48
49 2 static inline void volk_8ic_deinterleave_real_8i_a_avx2(int8_t* iBuffer,
50 const lv_8sc_t* complexVector,
51 unsigned int num_points)
52 {
53 2 unsigned int number = 0;
54 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
55 2 int8_t* iBufferPtr = iBuffer;
56 2 __m256i moveMask1 = _mm256_set_epi8(0x80,
57 0x80,
58 0x80,
59 0x80,
60 0x80,
61 0x80,
62 0x80,
63 0x80,
64 14,
65 12,
66 10,
67 8,
68 6,
69 4,
70 2,
71 0,
72 0x80,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 0x80,
78 0x80,
79 0x80,
80 14,
81 12,
82 10,
83 8,
84 6,
85 4,
86 2,
87 0);
88 2 __m256i moveMask2 = _mm256_set_epi8(14,
89 12,
90 10,
91 8,
92 6,
93 4,
94 2,
95 0,
96 0x80,
97 0x80,
98 0x80,
99 0x80,
100 0x80,
101 0x80,
102 0x80,
103 0x80,
104 14,
105 12,
106 10,
107 8,
108 6,
109 4,
110 2,
111 0,
112 0x80,
113 0x80,
114 0x80,
115 0x80,
116 0x80,
117 0x80,
118 0x80,
119 0x80);
120 __m256i complexVal1, complexVal2, outputVal;
121
122 2 unsigned int thirtysecondPoints = num_points / 32;
123
124
2/2
✓ Branch 0 taken 8190 times.
✓ Branch 1 taken 2 times.
8192 for (number = 0; number < thirtysecondPoints; number++) {
125
126 8190 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
127 8190 complexVectorPtr += 32;
128 8190 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
129 8190 complexVectorPtr += 32;
130
131 8190 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
132 8190 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
133 8190 outputVal = _mm256_or_si256(complexVal1, complexVal2);
134 8190 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
135
136 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
137 8190 iBufferPtr += 32;
138 }
139
140 2 number = thirtysecondPoints * 32;
141
2/2
✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
64 for (; number < num_points; number++) {
142 62 *iBufferPtr++ = *complexVectorPtr++;
143 62 complexVectorPtr++;
144 }
145 2 }
146 #endif /* LV_HAVE_AVX2 */
147
148
149 #ifdef LV_HAVE_SSSE3
150 #include <tmmintrin.h>
151
152 2 static inline void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer,
153 const lv_8sc_t* complexVector,
154 unsigned int num_points)
155 {
156 2 unsigned int number = 0;
157 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
158 2 int8_t* iBufferPtr = iBuffer;
159 2 __m128i moveMask1 = _mm_set_epi8(
160 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
161 2 __m128i moveMask2 = _mm_set_epi8(
162 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
163 __m128i complexVal1, complexVal2, outputVal;
164
165 2 unsigned int sixteenthPoints = num_points / 16;
166
167
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
168 16382 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
169 16382 complexVectorPtr += 16;
170 16382 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
171 16382 complexVectorPtr += 16;
172
173 16382 complexVal1 = _mm_shuffle_epi8(complexVal1, moveMask1);
174 16382 complexVal2 = _mm_shuffle_epi8(complexVal2, moveMask2);
175
176 16382 outputVal = _mm_or_si128(complexVal1, complexVal2);
177
178 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
179 16382 iBufferPtr += 16;
180 }
181
182 2 number = sixteenthPoints * 16;
183
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
184 30 *iBufferPtr++ = *complexVectorPtr++;
185 30 complexVectorPtr++;
186 }
187 2 }
188 #endif /* LV_HAVE_SSSE3 */
189
190
191 #ifdef LV_HAVE_AVX
192 #include <immintrin.h>
193
194 2 static inline void volk_8ic_deinterleave_real_8i_a_avx(int8_t* iBuffer,
195 const lv_8sc_t* complexVector,
196 unsigned int num_points)
197 {
198 2 unsigned int number = 0;
199 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
200 2 int8_t* iBufferPtr = iBuffer;
201 2 __m128i moveMaskL = _mm_set_epi8(
202 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
203 2 __m128i moveMaskH = _mm_set_epi8(
204 14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
205 __m256i complexVal1, complexVal2, outputVal;
206 __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1,
207 outputVal2;
208
209 2 unsigned int thirtysecondPoints = num_points / 32;
210
211
2/2
✓ Branch 0 taken 8190 times.
✓ Branch 1 taken 2 times.
8192 for (number = 0; number < thirtysecondPoints; number++) {
212
213 8190 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
214 8190 complexVectorPtr += 32;
215 8190 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
216 8190 complexVectorPtr += 32;
217
218 8190 complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
219 8190 complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
220 8190 complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
221 8190 complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
222
223 8190 complexVal1H = _mm_shuffle_epi8(complexVal1H, moveMaskH);
224 8190 complexVal1L = _mm_shuffle_epi8(complexVal1L, moveMaskL);
225 8190 outputVal1 = _mm_or_si128(complexVal1H, complexVal1L);
226
227
228 8190 complexVal2H = _mm_shuffle_epi8(complexVal2H, moveMaskH);
229 8190 complexVal2L = _mm_shuffle_epi8(complexVal2L, moveMaskL);
230 8190 outputVal2 = _mm_or_si128(complexVal2H, complexVal2L);
231
232 8190 __m256i dummy = _mm256_setzero_si256();
233 8190 outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
234 8190 outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
235
236
237 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
238 8190 iBufferPtr += 32;
239 }
240
241 2 number = thirtysecondPoints * 32;
242
2/2
✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
64 for (; number < num_points; number++) {
243 62 *iBufferPtr++ = *complexVectorPtr++;
244 62 complexVectorPtr++;
245 }
246 2 }
247 #endif /* LV_HAVE_AVX */
248
249
250 #ifdef LV_HAVE_GENERIC
251
252 2 static inline void volk_8ic_deinterleave_real_8i_generic(int8_t* iBuffer,
253 const lv_8sc_t* complexVector,
254 unsigned int num_points)
255 {
256 2 unsigned int number = 0;
257 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
258 2 int8_t* iBufferPtr = iBuffer;
259
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
260 262142 *iBufferPtr++ = *complexVectorPtr++;
261 262142 complexVectorPtr++;
262 }
263 2 }
264 #endif /* LV_HAVE_GENERIC */
265
266
267 #ifdef LV_HAVE_NEON
268 #include <arm_neon.h>
269
270 static inline void volk_8ic_deinterleave_real_8i_neon(int8_t* iBuffer,
271 const lv_8sc_t* complexVector,
272 unsigned int num_points)
273 {
274 unsigned int number;
275 unsigned int sixteenth_points = num_points / 16;
276
277 int8x16x2_t input_vector;
278 for (number = 0; number < sixteenth_points; ++number) {
279 input_vector = vld2q_s8((int8_t*)complexVector);
280 vst1q_s8(iBuffer, input_vector.val[0]);
281 iBuffer += 16;
282 complexVector += 16;
283 }
284
285 const int8_t* complexVectorPtr = (int8_t*)complexVector;
286 int8_t* iBufferPtr = iBuffer;
287 for (number = sixteenth_points * 16; number < num_points; number++) {
288 *iBufferPtr++ = *complexVectorPtr++;
289 complexVectorPtr++;
290 }
291 }
292 #endif /* LV_HAVE_NEON */
293
294
295 #endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H */
296
297 #ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H
298 #define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H
299
300 #include <inttypes.h>
301 #include <stdio.h>
302
303 #ifdef LV_HAVE_AVX2
304 #include <immintrin.h>
305
306 2 static inline void volk_8ic_deinterleave_real_8i_u_avx2(int8_t* iBuffer,
307 const lv_8sc_t* complexVector,
308 unsigned int num_points)
309 {
310 2 unsigned int number = 0;
311 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
312 2 int8_t* iBufferPtr = iBuffer;
313 2 __m256i moveMask1 = _mm256_set_epi8(0x80,
314 0x80,
315 0x80,
316 0x80,
317 0x80,
318 0x80,
319 0x80,
320 0x80,
321 14,
322 12,
323 10,
324 8,
325 6,
326 4,
327 2,
328 0,
329 0x80,
330 0x80,
331 0x80,
332 0x80,
333 0x80,
334 0x80,
335 0x80,
336 0x80,
337 14,
338 12,
339 10,
340 8,
341 6,
342 4,
343 2,
344 0);
345 2 __m256i moveMask2 = _mm256_set_epi8(14,
346 12,
347 10,
348 8,
349 6,
350 4,
351 2,
352 0,
353 0x80,
354 0x80,
355 0x80,
356 0x80,
357 0x80,
358 0x80,
359 0x80,
360 0x80,
361 14,
362 12,
363 10,
364 8,
365 6,
366 4,
367 2,
368 0,
369 0x80,
370 0x80,
371 0x80,
372 0x80,
373 0x80,
374 0x80,
375 0x80,
376 0x80);
377 __m256i complexVal1, complexVal2, outputVal;
378
379 2 unsigned int thirtysecondPoints = num_points / 32;
380
381
2/2
✓ Branch 0 taken 8190 times.
✓ Branch 1 taken 2 times.
8192 for (number = 0; number < thirtysecondPoints; number++) {
382
383 8190 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
384 8190 complexVectorPtr += 32;
385 8190 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
386 8190 complexVectorPtr += 32;
387
388 8190 complexVal1 = _mm256_shuffle_epi8(complexVal1, moveMask1);
389 8190 complexVal2 = _mm256_shuffle_epi8(complexVal2, moveMask2);
390 8190 outputVal = _mm256_or_si256(complexVal1, complexVal2);
391 8190 outputVal = _mm256_permute4x64_epi64(outputVal, 0xd8);
392
393 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
394 8190 iBufferPtr += 32;
395 }
396
397 2 number = thirtysecondPoints * 32;
398
2/2
✓ Branch 0 taken 62 times.
✓ Branch 1 taken 2 times.
64 for (; number < num_points; number++) {
399 62 *iBufferPtr++ = *complexVectorPtr++;
400 62 complexVectorPtr++;
401 }
402 2 }
403 #endif /* LV_HAVE_AVX2 */
404
405 #endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_UNALIGNED8_H */
406