GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_16ic_deinterleave_16i_x2.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 118 118 100.0%
Functions: 6 6 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_16ic_deinterleave_16i_x2
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex 16 bit vector into I & Q vector data.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_16ic_deinterleave_16i_x2(int16_t* iBuffer, int16_t* qBuffer, const lv_16sc_t*
20 * complexVector, unsigned int num_points) \endcode
21 *
22 * \b Inputs
23 * \li complexVector: The complex input vector.
24 * \li num_points: The number of complex data values to be deinterleaved.
25 *
26 * \b Outputs
27 * \li iBuffer: The I buffer output data.
28 * \li qBuffer: The Q buffer output data.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * volk_16ic_deinterleave_16i_x2();
35 *
36 * volk_free(x);
37 * volk_free(t);
38 * \endcode
39 */
40
41 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
42 #define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
43
44 #include <inttypes.h>
45 #include <stdio.h>
46 #ifdef LV_HAVE_AVX2
47 #include <immintrin.h>
48
49 2 static inline void volk_16ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
50 int16_t* qBuffer,
51 const lv_16sc_t* complexVector,
52 unsigned int num_points)
53 {
54 2 unsigned int number = 0;
55 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 2 int16_t* iBufferPtr = iBuffer;
57 2 int16_t* qBufferPtr = qBuffer;
58
59 2 __m256i MoveMask = _mm256_set_epi8(15,
60 14,
61 11,
62 10,
63 7,
64 6,
65 3,
66 2,
67 13,
68 12,
69 9,
70 8,
71 5,
72 4,
73 1,
74 0,
75 15,
76 14,
77 11,
78 10,
79 7,
80 6,
81 3,
82 2,
83 13,
84 12,
85 9,
86 8,
87 5,
88 4,
89 1,
90 0);
91
92 __m256i iMove2, iMove1;
93 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
94
95 2 unsigned int sixteenthPoints = num_points / 16;
96
97
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
98 16382 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
99 16382 complexVectorPtr += 32;
100 16382 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
101 16382 complexVectorPtr += 32;
102
103 16382 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
104 16382 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
105
106 16382 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
107 _mm256_permute4x64_epi64(iMove2, 0x80),
108 0x30);
109 16382 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
110 _mm256_permute4x64_epi64(iMove2, 0xd0),
111 0x30);
112
113 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
114 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
115
116 16382 iBufferPtr += 16;
117 16382 qBufferPtr += 16;
118 }
119
120 2 number = sixteenthPoints * 16;
121 2 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
122
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
123 30 *iBufferPtr++ = *int16ComplexVectorPtr++;
124 30 *qBufferPtr++ = *int16ComplexVectorPtr++;
125 }
126 2 }
127 #endif /* LV_HAVE_AVX2 */
128
129 #ifdef LV_HAVE_SSSE3
130 #include <tmmintrin.h>
131
132 2 static inline void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t* iBuffer,
133 int16_t* qBuffer,
134 const lv_16sc_t* complexVector,
135 unsigned int num_points)
136 {
137 2 unsigned int number = 0;
138 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 2 int16_t* iBufferPtr = iBuffer;
140 2 int16_t* qBufferPtr = qBuffer;
141
142 2 __m128i iMoveMask1 = _mm_set_epi8(
143 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
144 2 __m128i iMoveMask2 = _mm_set_epi8(
145 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
146
147 2 __m128i qMoveMask1 = _mm_set_epi8(
148 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
149 2 __m128i qMoveMask2 = _mm_set_epi8(
150 15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
151
152 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
153
154 2 unsigned int eighthPoints = num_points / 8;
155
156
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < eighthPoints; number++) {
157 32766 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
158 32766 complexVectorPtr += 16;
159 32766 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
160 32766 complexVectorPtr += 16;
161
162 98298 iOutputVal = _mm_or_si128(_mm_shuffle_epi8(complexVal1, iMoveMask1),
163 _mm_shuffle_epi8(complexVal2, iMoveMask2));
164 98298 qOutputVal = _mm_or_si128(_mm_shuffle_epi8(complexVal1, qMoveMask1),
165 _mm_shuffle_epi8(complexVal2, qMoveMask2));
166
167 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
168 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
169
170 32766 iBufferPtr += 8;
171 32766 qBufferPtr += 8;
172 }
173
174 2 number = eighthPoints * 8;
175 2 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
176
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
177 14 *iBufferPtr++ = *int16ComplexVectorPtr++;
178 14 *qBufferPtr++ = *int16ComplexVectorPtr++;
179 }
180 2 }
181 #endif /* LV_HAVE_SSSE3 */
182
183 #ifdef LV_HAVE_SSE2
184 #include <emmintrin.h>
185
186 2 static inline void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t* iBuffer,
187 int16_t* qBuffer,
188 const lv_16sc_t* complexVector,
189 unsigned int num_points)
190 {
191 2 unsigned int number = 0;
192 2 const int16_t* complexVectorPtr = (int16_t*)complexVector;
193 2 int16_t* iBufferPtr = iBuffer;
194 2 int16_t* qBufferPtr = qBuffer;
195 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1,
196 qComplexVal2, iOutputVal, qOutputVal;
197 2 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
198 2 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
199
200 2 unsigned int eighthPoints = num_points / 8;
201
202
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < eighthPoints; number++) {
203 32766 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
204 32766 complexVectorPtr += 8;
205 32766 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
206 32766 complexVectorPtr += 8;
207
208 32766 iComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
209
210 32766 iComplexVal1 = _mm_shufflehi_epi16(iComplexVal1, _MM_SHUFFLE(3, 1, 2, 0));
211
212 32766 iComplexVal1 = _mm_shuffle_epi32(iComplexVal1, _MM_SHUFFLE(3, 1, 2, 0));
213
214 32766 iComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
215
216 32766 iComplexVal2 = _mm_shufflehi_epi16(iComplexVal2, _MM_SHUFFLE(3, 1, 2, 0));
217
218 32766 iComplexVal2 = _mm_shuffle_epi32(iComplexVal2, _MM_SHUFFLE(2, 0, 3, 1));
219
220 98298 iOutputVal = _mm_or_si128(_mm_and_si128(iComplexVal1, lowMask),
221 _mm_and_si128(iComplexVal2, highMask));
222
223 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
224
225 32766 qComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(2, 0, 3, 1));
226
227 32766 qComplexVal1 = _mm_shufflehi_epi16(qComplexVal1, _MM_SHUFFLE(2, 0, 3, 1));
228
229 32766 qComplexVal1 = _mm_shuffle_epi32(qComplexVal1, _MM_SHUFFLE(3, 1, 2, 0));
230
231 32766 qComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(2, 0, 3, 1));
232
233 32766 qComplexVal2 = _mm_shufflehi_epi16(qComplexVal2, _MM_SHUFFLE(2, 0, 3, 1));
234
235 32766 qComplexVal2 = _mm_shuffle_epi32(qComplexVal2, _MM_SHUFFLE(2, 0, 3, 1));
236
237 98298 qOutputVal = _mm_or_si128(_mm_and_si128(qComplexVal1, lowMask),
238 _mm_and_si128(qComplexVal2, highMask));
239
240 _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
241
242 32766 iBufferPtr += 8;
243 32766 qBufferPtr += 8;
244 }
245
246 2 number = eighthPoints * 8;
247
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
248 14 *iBufferPtr++ = *complexVectorPtr++;
249 14 *qBufferPtr++ = *complexVectorPtr++;
250 }
251 2 }
252 #endif /* LV_HAVE_SSE2 */
253
254 #ifdef LV_HAVE_GENERIC
255
256 2 static inline void volk_16ic_deinterleave_16i_x2_generic(int16_t* iBuffer,
257 int16_t* qBuffer,
258 const lv_16sc_t* complexVector,
259 unsigned int num_points)
260 {
261 2 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
262 2 int16_t* iBufferPtr = iBuffer;
263 2 int16_t* qBufferPtr = qBuffer;
264 unsigned int number;
265
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
266 262142 *iBufferPtr++ = *complexVectorPtr++;
267 262142 *qBufferPtr++ = *complexVectorPtr++;
268 }
269 2 }
270 #endif /* LV_HAVE_GENERIC */
271
272 #ifdef LV_HAVE_ORC
273
274 extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer,
275 int16_t* qBuffer,
276 const lv_16sc_t* complexVector,
277 unsigned int num_points);
278 2 static inline void volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer,
279 int16_t* qBuffer,
280 const lv_16sc_t* complexVector,
281 unsigned int num_points)
282 {
283 2 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
284 2 }
285 #endif /* LV_HAVE_ORC */
286
287 #endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a_H */
288
289
290 #ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
291 #define INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
292
293 #include <inttypes.h>
294 #include <stdio.h>
295 #ifdef LV_HAVE_AVX2
296 #include <immintrin.h>
297
298 2 static inline void volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
299 int16_t* qBuffer,
300 const lv_16sc_t* complexVector,
301 unsigned int num_points)
302 {
303 2 unsigned int number = 0;
304 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
305 2 int16_t* iBufferPtr = iBuffer;
306 2 int16_t* qBufferPtr = qBuffer;
307
308 2 __m256i MoveMask = _mm256_set_epi8(15,
309 14,
310 11,
311 10,
312 7,
313 6,
314 3,
315 2,
316 13,
317 12,
318 9,
319 8,
320 5,
321 4,
322 1,
323 0,
324 15,
325 14,
326 11,
327 10,
328 7,
329 6,
330 3,
331 2,
332 13,
333 12,
334 9,
335 8,
336 5,
337 4,
338 1,
339 0);
340
341 __m256i iMove2, iMove1;
342 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
343
344 2 unsigned int sixteenthPoints = num_points / 16;
345
346
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
347 16382 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
348 16382 complexVectorPtr += 32;
349 16382 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
350 16382 complexVectorPtr += 32;
351
352 16382 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
353 16382 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
354
355 16382 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
356 _mm256_permute4x64_epi64(iMove2, 0x80),
357 0x30);
358 16382 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
359 _mm256_permute4x64_epi64(iMove2, 0xd0),
360 0x30);
361
362 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
363 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
364
365 16382 iBufferPtr += 16;
366 16382 qBufferPtr += 16;
367 }
368
369 2 number = sixteenthPoints * 16;
370 2 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
371
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
372 30 *iBufferPtr++ = *int16ComplexVectorPtr++;
373 30 *qBufferPtr++ = *int16ComplexVectorPtr++;
374 }
375 2 }
376 #endif /* LV_HAVE_AVX2 */
377
378 #endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_u_H */
379