GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_8ic_deinterleave_real_16i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 92 92 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_8ic_deinterleave_real_16i
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex 8-bit char vector into just the I (real)
16 * vector and converts it to 16-bit shorts.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_8ic_deinterleave_real_16i(int16_t* iBuffer, const lv_8sc_t* complexVector,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * volk_8ic_deinterleave_real_16i();
35 *
36 * volk_free(x);
37 * \endcode
38 */
39
40 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
41 #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
42
43 #include <inttypes.h>
44 #include <stdio.h>
45
46
47 #ifdef LV_HAVE_AVX2
48 #include <immintrin.h>
49
50 2 static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
51 const lv_8sc_t* complexVector,
52 unsigned int num_points)
53 {
54 2 unsigned int number = 0;
55 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 2 int16_t* iBufferPtr = iBuffer;
57 2 __m256i moveMask = _mm256_set_epi8(0x80,
58 0x80,
59 0x80,
60 0x80,
61 0x80,
62 0x80,
63 0x80,
64 0x80,
65 14,
66 12,
67 10,
68 8,
69 6,
70 4,
71 2,
72 0,
73 0x80,
74 0x80,
75 0x80,
76 0x80,
77 0x80,
78 0x80,
79 0x80,
80 0x80,
81 14,
82 12,
83 10,
84 8,
85 6,
86 4,
87 2,
88 0);
89 __m256i complexVal, outputVal;
90 __m128i outputVal0;
91
92 2 unsigned int sixteenthPoints = num_points / 16;
93
94
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
95 16382 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
96 16382 complexVectorPtr += 32;
97
98 16382 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
99 16382 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
100
101 16382 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
102
103 16382 outputVal = _mm256_cvtepi8_epi16(outputVal0);
104 16382 outputVal = _mm256_slli_epi16(outputVal, 7);
105
106 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
107
108 16382 iBufferPtr += 16;
109 }
110
111 2 number = sixteenthPoints * 16;
112
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
113 30 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
114 30 complexVectorPtr++;
115 }
116 2 }
117 #endif /* LV_HAVE_AVX2 */
118
119 #ifdef LV_HAVE_SSE4_1
120 #include <smmintrin.h>
121
122 2 static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer,
123 const lv_8sc_t* complexVector,
124 unsigned int num_points)
125 {
126 2 unsigned int number = 0;
127 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
128 2 int16_t* iBufferPtr = iBuffer;
129 2 __m128i moveMask = _mm_set_epi8(
130 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
131 __m128i complexVal, outputVal;
132
133 2 unsigned int eighthPoints = num_points / 8;
134
135
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (number = 0; number < eighthPoints; number++) {
136 32766 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
137 32766 complexVectorPtr += 16;
138
139 32766 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
140
141 32766 outputVal = _mm_cvtepi8_epi16(complexVal);
142 32766 outputVal = _mm_slli_epi16(outputVal, 7);
143
144 _mm_store_si128((__m128i*)iBufferPtr, outputVal);
145 32766 iBufferPtr += 8;
146 }
147
148 2 number = eighthPoints * 8;
149
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
150 14 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
151 14 complexVectorPtr++;
152 }
153 2 }
154 #endif /* LV_HAVE_SSE4_1 */
155
156
157 #ifdef LV_HAVE_AVX
158 #include <immintrin.h>
159
160 2 static inline void volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer,
161 const lv_8sc_t* complexVector,
162 unsigned int num_points)
163 {
164 2 unsigned int number = 0;
165 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
166 2 int16_t* iBufferPtr = iBuffer;
167 2 __m128i moveMask = _mm_set_epi8(
168 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
169 __m256i complexVal, outputVal;
170 __m128i complexVal1, complexVal0, outputVal1, outputVal0;
171
172 2 unsigned int sixteenthPoints = num_points / 16;
173
174
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
175 16382 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
176 16382 complexVectorPtr += 32;
177
178 16382 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
179 16382 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
180
181 16382 outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
182 16382 outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
183
184 16382 outputVal1 = _mm_cvtepi8_epi16(outputVal1);
185 16382 outputVal1 = _mm_slli_epi16(outputVal1, 7);
186 16382 outputVal0 = _mm_cvtepi8_epi16(outputVal0);
187 16382 outputVal0 = _mm_slli_epi16(outputVal0, 7);
188
189 16382 __m256i dummy = _mm256_setzero_si256();
190 16382 outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
191 16382 outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
192 _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
193
194 16382 iBufferPtr += 16;
195 }
196
197 2 number = sixteenthPoints * 16;
198
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
199 30 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
200 30 complexVectorPtr++;
201 }
202 2 }
203 #endif /* LV_HAVE_AVX */
204
205
206 #ifdef LV_HAVE_GENERIC
207
208 2 static inline void volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer,
209 const lv_8sc_t* complexVector,
210 unsigned int num_points)
211 {
212 2 unsigned int number = 0;
213 2 const int8_t* complexVectorPtr = (const int8_t*)complexVector;
214 2 int16_t* iBufferPtr = iBuffer;
215
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
216 262142 *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
217 262142 complexVectorPtr++;
218 }
219 2 }
220 #endif /* LV_HAVE_GENERIC */
221
222
223 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
224
225 #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H
226 #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H
227
228 #include <inttypes.h>
229 #include <stdio.h>
230
231
232 #ifdef LV_HAVE_AVX2
233 #include <immintrin.h>
234
235 2 static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
236 const lv_8sc_t* complexVector,
237 unsigned int num_points)
238 {
239 2 unsigned int number = 0;
240 2 const int8_t* complexVectorPtr = (int8_t*)complexVector;
241 2 int16_t* iBufferPtr = iBuffer;
242 2 __m256i moveMask = _mm256_set_epi8(0x80,
243 0x80,
244 0x80,
245 0x80,
246 0x80,
247 0x80,
248 0x80,
249 0x80,
250 14,
251 12,
252 10,
253 8,
254 6,
255 4,
256 2,
257 0,
258 0x80,
259 0x80,
260 0x80,
261 0x80,
262 0x80,
263 0x80,
264 0x80,
265 0x80,
266 14,
267 12,
268 10,
269 8,
270 6,
271 4,
272 2,
273 0);
274 __m256i complexVal, outputVal;
275 __m128i outputVal0;
276
277 2 unsigned int sixteenthPoints = num_points / 16;
278
279
2/2
✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
16384 for (number = 0; number < sixteenthPoints; number++) {
280 16382 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
281 16382 complexVectorPtr += 32;
282
283 16382 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
284 16382 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
285
286 16382 outputVal0 = _mm256_extractf128_si256(complexVal, 0);
287
288 16382 outputVal = _mm256_cvtepi8_epi16(outputVal0);
289 16382 outputVal = _mm256_slli_epi16(outputVal, 7);
290
291 _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal);
292
293 16382 iBufferPtr += 16;
294 }
295
296 2 number = sixteenthPoints * 16;
297
2/2
✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
32 for (; number < num_points; number++) {
298 30 *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
299 30 complexVectorPtr++;
300 }
301 2 }
302 #endif /* LV_HAVE_AVX2 */
303 #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */
304