GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_16ic_deinterleave_real_16i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 98 98 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_16ic_deinterleave_real_16i
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex 16 bit vector and returns the real (inphase) part of the
16 * signal.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_16ic_deinterleave_real_16i(int16_t* iBuffer, const lv_16sc_t* complexVector,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * volk_16ic_deinterleave_real_16i();
35 *
36 * volk_free(x);
37 * volk_free(t);
38 * \endcode
39 */
40
41 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
42 #define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
43
44 #include <inttypes.h>
45 #include <stdio.h>
46
47
48 #ifdef LV_HAVE_AVX2
49 #include <immintrin.h>
50
51 2 static inline void volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
52 const lv_16sc_t* complexVector,
53 unsigned int num_points)
54 {
55 2 unsigned int number = 0;
56 2 const int16_t* complexVectorPtr = (int16_t*)complexVector;
57 2 int16_t* iBufferPtr = iBuffer;
58
59 2 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
60 0x80,
61 0x80,
62 0x80,
63 0x80,
64 0x80,
65 0x80,
66 0x80,
67 13,
68 12,
69 9,
70 8,
71 5,
72 4,
73 1,
74 0,
75 0x80,
76 0x80,
77 0x80,
78 0x80,
79 0x80,
80 0x80,
81 0x80,
82 0x80,
83 13,
84 12,
85 9,
86 8,
87 5,
88 4,
89 1,
90 0);
91 2 __m256i iMoveMask2 = _mm256_set_epi8(13,
92 12,
93 9,
94 8,
95 5,
96 4,
97 1,
98 0,
99 0x80,
100 0x80,
101 0x80,
102 0x80,
103 0x80,
104 0x80,
105 0x80,
106 0x80,
107 13,
108 12,
109 9,
110 8,
111 5,
112 4,
113 1,
114 0,
115 0x80,
116 0x80,
117 0x80,
118 0x80,
119 0x80,
120 0x80,
121 0x80,
122 0x80);
123
124 __m256i complexVal1, complexVal2, iOutputVal;
125
126 2 unsigned int sixteenthPoints = num_points / 16;
127
128
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
129 16382 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
130 16382 complexVectorPtr += 16;
131 16382 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
132 16382 complexVectorPtr += 16;
133
134 16382 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
135 16382 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
136
137 16382 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
138 16382 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
139
140 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
141
142 16382 iBufferPtr += 16;
143 }
144
145 2 number = sixteenthPoints * 16;
146
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
147 30 *iBufferPtr++ = *complexVectorPtr++;
148 30 complexVectorPtr++;
149 }
150 2 }
151 #endif /* LV_HAVE_AVX2 */
152
153 #ifdef LV_HAVE_SSSE3
154 #include <tmmintrin.h>
155
156 2 static inline void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t* iBuffer,
157 const lv_16sc_t* complexVector,
158 unsigned int num_points)
159 {
160 2 unsigned int number = 0;
161 2 const int16_t* complexVectorPtr = (int16_t*)complexVector;
162 2 int16_t* iBufferPtr = iBuffer;
163
164 2 __m128i iMoveMask1 = _mm_set_epi8(
165 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
166 2 __m128i iMoveMask2 = _mm_set_epi8(
167 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
168
169 __m128i complexVal1, complexVal2, iOutputVal;
170
171 2 unsigned int eighthPoints = num_points / 8;
172
173
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < eighthPoints; number++) {
174 32766 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
175 32766 complexVectorPtr += 8;
176 32766 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
177 32766 complexVectorPtr += 8;
178
179 32766 complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
180 32766 complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
181
182 32766 iOutputVal = _mm_or_si128(complexVal1, complexVal2);
183
184 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
185
186 32766 iBufferPtr += 8;
187 }
188
189 2 number = eighthPoints * 8;
190
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
191 14 *iBufferPtr++ = *complexVectorPtr++;
192 14 complexVectorPtr++;
193 }
194 2 }
195 #endif /* LV_HAVE_SSSE3 */
196
197
198 #ifdef LV_HAVE_SSE2
199 #include <emmintrin.h>
200
201 2 static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer,
202 const lv_16sc_t* complexVector,
203 unsigned int num_points)
204 {
205 2 unsigned int number = 0;
206 2 const int16_t* complexVectorPtr = (int16_t*)complexVector;
207 2 int16_t* iBufferPtr = iBuffer;
208 __m128i complexVal1, complexVal2, iOutputVal;
209 2 __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
210 2 __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
211
212 2 unsigned int eighthPoints = num_points / 8;
213
214
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < eighthPoints; number++) {
215 32766 complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);
216 32766 complexVectorPtr += 8;
217 32766 complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);
218 32766 complexVectorPtr += 8;
219
220 32766 complexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
221
222 32766 complexVal1 = _mm_shufflehi_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
223
224 32766 complexVal1 = _mm_shuffle_epi32(complexVal1, _MM_SHUFFLE(3, 1, 2, 0));
225
226 32766 complexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
227
228 32766 complexVal2 = _mm_shufflehi_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0));
229
230 32766 complexVal2 = _mm_shuffle_epi32(complexVal2, _MM_SHUFFLE(2, 0, 3, 1));
231
232 98298 iOutputVal = _mm_or_si128(_mm_and_si128(complexVal1, lowMask),
233 _mm_and_si128(complexVal2, highMask));
234
235 _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
236
237 32766 iBufferPtr += 8;
238 }
239
240 2 number = eighthPoints * 8;
241
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
242 14 *iBufferPtr++ = *complexVectorPtr++;
243 14 complexVectorPtr++;
244 }
245 2 }
246 #endif /* LV_HAVE_SSE2 */
247
248 #ifdef LV_HAVE_GENERIC
249
250 2 static inline void volk_16ic_deinterleave_real_16i_generic(int16_t* iBuffer,
251 const lv_16sc_t* complexVector,
252 unsigned int num_points)
253 {
254 2 unsigned int number = 0;
255 2 const int16_t* complexVectorPtr = (int16_t*)complexVector;
256 2 int16_t* iBufferPtr = iBuffer;
257
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
258 262142 *iBufferPtr++ = *complexVectorPtr++;
259 262142 complexVectorPtr++;
260 }
261 2 }
262 #endif /* LV_HAVE_GENERIC */
263
264
265 #endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a_H */
266
267
268 #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H
269 #define INCLUDED_volk_16ic_deinterleave_real_16i_u_H
270
271 #include <inttypes.h>
272 #include <stdio.h>
273
274
275 #ifdef LV_HAVE_AVX2
276 #include <immintrin.h>
277
278 2 static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
279 const lv_16sc_t* complexVector,
280 unsigned int num_points)
281 {
282 2 unsigned int number = 0;
283 2 const int16_t* complexVectorPtr = (int16_t*)complexVector;
284 2 int16_t* iBufferPtr = iBuffer;
285
286 2 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
287 0x80,
288 0x80,
289 0x80,
290 0x80,
291 0x80,
292 0x80,
293 0x80,
294 13,
295 12,
296 9,
297 8,
298 5,
299 4,
300 1,
301 0,
302 0x80,
303 0x80,
304 0x80,
305 0x80,
306 0x80,
307 0x80,
308 0x80,
309 0x80,
310 13,
311 12,
312 9,
313 8,
314 5,
315 4,
316 1,
317 0);
318 2 __m256i iMoveMask2 = _mm256_set_epi8(13,
319 12,
320 9,
321 8,
322 5,
323 4,
324 1,
325 0,
326 0x80,
327 0x80,
328 0x80,
329 0x80,
330 0x80,
331 0x80,
332 0x80,
333 0x80,
334 13,
335 12,
336 9,
337 8,
338 5,
339 4,
340 1,
341 0,
342 0x80,
343 0x80,
344 0x80,
345 0x80,
346 0x80,
347 0x80,
348 0x80,
349 0x80);
350
351 __m256i complexVal1, complexVal2, iOutputVal;
352
353 2 unsigned int sixteenthPoints = num_points / 16;
354
355
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
356 16382 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
357 16382 complexVectorPtr += 16;
358 16382 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
359 16382 complexVectorPtr += 16;
360
361 16382 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
362 16382 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
363
364 16382 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
365 16382 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
366
367 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
368
369 16382 iBufferPtr += 16;
370 }
371
372 2 number = sixteenthPoints * 16;
373
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
374 30 *iBufferPtr++ = *complexVectorPtr++;
375 30 complexVectorPtr++;
376 }
377 2 }
378 #endif /* LV_HAVE_AVX2 */
379
380 #endif /* INCLUDED_volk_16ic_deinterleave_real_16i_u_H */
381