Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_16ic_deinterleave_real_16i | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Deinterleaves the complex 16 bit vector and returns the real (inphase) part of the | ||
16 | * signal. | ||
17 | * | ||
18 | * <b>Dispatcher Prototype</b> | ||
19 | * \code | ||
20 | * void volk_16ic_deinterleave_real_16i(int16_t* iBuffer, const lv_16sc_t* complexVector, | ||
21 | * unsigned int num_points) \endcode | ||
22 | * | ||
23 | * \b Inputs | ||
24 | * \li complexVector: The complex input vector. | ||
25 | * \li num_points: The number of complex data values to be deinterleaved. | ||
26 | * | ||
27 | * \b Outputs | ||
28 | * \li iBuffer: The I buffer output data. | ||
29 | * | ||
30 | * \b Example | ||
31 | * \code | ||
32 | * int N = 10000; | ||
33 | * | ||
34 | * volk_16ic_deinterleave_real_16i(); | ||
35 | * | ||
36 | * volk_free(x); | ||
37 | * volk_free(t); | ||
38 | * \endcode | ||
39 | */ | ||
40 | |||
41 | #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H | ||
42 | #define INCLUDED_volk_16ic_deinterleave_real_16i_a_H | ||
43 | |||
44 | #include <inttypes.h> | ||
45 | #include <stdio.h> | ||
46 | |||
47 | |||
48 | #ifdef LV_HAVE_AVX2 | ||
49 | #include <immintrin.h> | ||
50 | |||
51 | 2 | static inline void volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer, | |
52 | const lv_16sc_t* complexVector, | ||
53 | unsigned int num_points) | ||
54 | { | ||
55 | 2 | unsigned int number = 0; | |
56 | 2 | const int16_t* complexVectorPtr = (int16_t*)complexVector; | |
57 | 2 | int16_t* iBufferPtr = iBuffer; | |
58 | |||
59 | 2 | __m256i iMoveMask1 = _mm256_set_epi8(0x80, | |
60 | 0x80, | ||
61 | 0x80, | ||
62 | 0x80, | ||
63 | 0x80, | ||
64 | 0x80, | ||
65 | 0x80, | ||
66 | 0x80, | ||
67 | 13, | ||
68 | 12, | ||
69 | 9, | ||
70 | 8, | ||
71 | 5, | ||
72 | 4, | ||
73 | 1, | ||
74 | 0, | ||
75 | 0x80, | ||
76 | 0x80, | ||
77 | 0x80, | ||
78 | 0x80, | ||
79 | 0x80, | ||
80 | 0x80, | ||
81 | 0x80, | ||
82 | 0x80, | ||
83 | 13, | ||
84 | 12, | ||
85 | 9, | ||
86 | 8, | ||
87 | 5, | ||
88 | 4, | ||
89 | 1, | ||
90 | 0); | ||
91 | 2 | __m256i iMoveMask2 = _mm256_set_epi8(13, | |
92 | 12, | ||
93 | 9, | ||
94 | 8, | ||
95 | 5, | ||
96 | 4, | ||
97 | 1, | ||
98 | 0, | ||
99 | 0x80, | ||
100 | 0x80, | ||
101 | 0x80, | ||
102 | 0x80, | ||
103 | 0x80, | ||
104 | 0x80, | ||
105 | 0x80, | ||
106 | 0x80, | ||
107 | 13, | ||
108 | 12, | ||
109 | 9, | ||
110 | 8, | ||
111 | 5, | ||
112 | 4, | ||
113 | 1, | ||
114 | 0, | ||
115 | 0x80, | ||
116 | 0x80, | ||
117 | 0x80, | ||
118 | 0x80, | ||
119 | 0x80, | ||
120 | 0x80, | ||
121 | 0x80, | ||
122 | 0x80); | ||
123 | |||
124 | __m256i complexVal1, complexVal2, iOutputVal; | ||
125 | |||
126 | 2 | unsigned int sixteenthPoints = num_points / 16; | |
127 | |||
128 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (number = 0; number < sixteenthPoints; number++) { |
129 | 16382 | complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr); | |
130 | 16382 | complexVectorPtr += 16; | |
131 | 16382 | complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr); | |
132 | 16382 | complexVectorPtr += 16; | |
133 | |||
134 | 16382 | complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1); | |
135 | 16382 | complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2); | |
136 | |||
137 | 16382 | iOutputVal = _mm256_or_si256(complexVal1, complexVal2); | |
138 | 16382 | iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8); | |
139 | |||
140 | _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal); | ||
141 | |||
142 | 16382 | iBufferPtr += 16; | |
143 | } | ||
144 | |||
145 | 2 | number = sixteenthPoints * 16; | |
146 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
147 | 30 | *iBufferPtr++ = *complexVectorPtr++; | |
148 | 30 | complexVectorPtr++; | |
149 | } | ||
150 | 2 | } | |
151 | #endif /* LV_HAVE_AVX2 */ | ||
152 | |||
153 | #ifdef LV_HAVE_SSSE3 | ||
154 | #include <tmmintrin.h> | ||
155 | |||
156 | 2 | static inline void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t* iBuffer, | |
157 | const lv_16sc_t* complexVector, | ||
158 | unsigned int num_points) | ||
159 | { | ||
160 | 2 | unsigned int number = 0; | |
161 | 2 | const int16_t* complexVectorPtr = (int16_t*)complexVector; | |
162 | 2 | int16_t* iBufferPtr = iBuffer; | |
163 | |||
164 | 2 | __m128i iMoveMask1 = _mm_set_epi8( | |
165 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0); | ||
166 | 2 | __m128i iMoveMask2 = _mm_set_epi8( | |
167 | 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80); | ||
168 | |||
169 | __m128i complexVal1, complexVal2, iOutputVal; | ||
170 | |||
171 | 2 | unsigned int eighthPoints = num_points / 8; | |
172 | |||
173 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (number = 0; number < eighthPoints; number++) { |
174 | 32766 | complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); | |
175 | 32766 | complexVectorPtr += 8; | |
176 | 32766 | complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); | |
177 | 32766 | complexVectorPtr += 8; | |
178 | |||
179 | 32766 | complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1); | |
180 | 32766 | complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2); | |
181 | |||
182 | 32766 | iOutputVal = _mm_or_si128(complexVal1, complexVal2); | |
183 | |||
184 | _mm_store_si128((__m128i*)iBufferPtr, iOutputVal); | ||
185 | |||
186 | 32766 | iBufferPtr += 8; | |
187 | } | ||
188 | |||
189 | 2 | number = eighthPoints * 8; | |
190 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
191 | 14 | *iBufferPtr++ = *complexVectorPtr++; | |
192 | 14 | complexVectorPtr++; | |
193 | } | ||
194 | 2 | } | |
195 | #endif /* LV_HAVE_SSSE3 */ | ||
196 | |||
197 | |||
198 | #ifdef LV_HAVE_SSE2 | ||
199 | #include <emmintrin.h> | ||
200 | |||
201 | 2 | static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer, | |
202 | const lv_16sc_t* complexVector, | ||
203 | unsigned int num_points) | ||
204 | { | ||
205 | 2 | unsigned int number = 0; | |
206 | 2 | const int16_t* complexVectorPtr = (int16_t*)complexVector; | |
207 | 2 | int16_t* iBufferPtr = iBuffer; | |
208 | __m128i complexVal1, complexVal2, iOutputVal; | ||
209 | 2 | __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF); | |
210 | 2 | __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0); | |
211 | |||
212 | 2 | unsigned int eighthPoints = num_points / 8; | |
213 | |||
214 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (number = 0; number < eighthPoints; number++) { |
215 | 32766 | complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr); | |
216 | 32766 | complexVectorPtr += 8; | |
217 | 32766 | complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr); | |
218 | 32766 | complexVectorPtr += 8; | |
219 | |||
220 | 32766 | complexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0)); | |
221 | |||
222 | 32766 | complexVal1 = _mm_shufflehi_epi16(complexVal1, _MM_SHUFFLE(3, 1, 2, 0)); | |
223 | |||
224 | 32766 | complexVal1 = _mm_shuffle_epi32(complexVal1, _MM_SHUFFLE(3, 1, 2, 0)); | |
225 | |||
226 | 32766 | complexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0)); | |
227 | |||
228 | 32766 | complexVal2 = _mm_shufflehi_epi16(complexVal2, _MM_SHUFFLE(3, 1, 2, 0)); | |
229 | |||
230 | 32766 | complexVal2 = _mm_shuffle_epi32(complexVal2, _MM_SHUFFLE(2, 0, 3, 1)); | |
231 | |||
232 | 98298 | iOutputVal = _mm_or_si128(_mm_and_si128(complexVal1, lowMask), | |
233 | _mm_and_si128(complexVal2, highMask)); | ||
234 | |||
235 | _mm_store_si128((__m128i*)iBufferPtr, iOutputVal); | ||
236 | |||
237 | 32766 | iBufferPtr += 8; | |
238 | } | ||
239 | |||
240 | 2 | number = eighthPoints * 8; | |
241 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
242 | 14 | *iBufferPtr++ = *complexVectorPtr++; | |
243 | 14 | complexVectorPtr++; | |
244 | } | ||
245 | 2 | } | |
246 | #endif /* LV_HAVE_SSE2 */ | ||
247 | |||
248 | #ifdef LV_HAVE_GENERIC | ||
249 | |||
250 | 2 | static inline void volk_16ic_deinterleave_real_16i_generic(int16_t* iBuffer, | |
251 | const lv_16sc_t* complexVector, | ||
252 | unsigned int num_points) | ||
253 | { | ||
254 | 2 | unsigned int number = 0; | |
255 | 2 | const int16_t* complexVectorPtr = (int16_t*)complexVector; | |
256 | 2 | int16_t* iBufferPtr = iBuffer; | |
257 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
258 | 262142 | *iBufferPtr++ = *complexVectorPtr++; | |
259 | 262142 | complexVectorPtr++; | |
260 | } | ||
261 | 2 | } | |
262 | #endif /* LV_HAVE_GENERIC */ | ||
263 | |||
264 | |||
265 | #endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a_H */ | ||
266 | |||
267 | |||
268 | #ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H | ||
269 | #define INCLUDED_volk_16ic_deinterleave_real_16i_u_H | ||
270 | |||
271 | #include <inttypes.h> | ||
272 | #include <stdio.h> | ||
273 | |||
274 | |||
275 | #ifdef LV_HAVE_AVX2 | ||
276 | #include <immintrin.h> | ||
277 | |||
278 | 2 | static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer, | |
279 | const lv_16sc_t* complexVector, | ||
280 | unsigned int num_points) | ||
281 | { | ||
282 | 2 | unsigned int number = 0; | |
283 | 2 | const int16_t* complexVectorPtr = (int16_t*)complexVector; | |
284 | 2 | int16_t* iBufferPtr = iBuffer; | |
285 | |||
286 | 2 | __m256i iMoveMask1 = _mm256_set_epi8(0x80, | |
287 | 0x80, | ||
288 | 0x80, | ||
289 | 0x80, | ||
290 | 0x80, | ||
291 | 0x80, | ||
292 | 0x80, | ||
293 | 0x80, | ||
294 | 13, | ||
295 | 12, | ||
296 | 9, | ||
297 | 8, | ||
298 | 5, | ||
299 | 4, | ||
300 | 1, | ||
301 | 0, | ||
302 | 0x80, | ||
303 | 0x80, | ||
304 | 0x80, | ||
305 | 0x80, | ||
306 | 0x80, | ||
307 | 0x80, | ||
308 | 0x80, | ||
309 | 0x80, | ||
310 | 13, | ||
311 | 12, | ||
312 | 9, | ||
313 | 8, | ||
314 | 5, | ||
315 | 4, | ||
316 | 1, | ||
317 | 0); | ||
318 | 2 | __m256i iMoveMask2 = _mm256_set_epi8(13, | |
319 | 12, | ||
320 | 9, | ||
321 | 8, | ||
322 | 5, | ||
323 | 4, | ||
324 | 1, | ||
325 | 0, | ||
326 | 0x80, | ||
327 | 0x80, | ||
328 | 0x80, | ||
329 | 0x80, | ||
330 | 0x80, | ||
331 | 0x80, | ||
332 | 0x80, | ||
333 | 0x80, | ||
334 | 13, | ||
335 | 12, | ||
336 | 9, | ||
337 | 8, | ||
338 | 5, | ||
339 | 4, | ||
340 | 1, | ||
341 | 0, | ||
342 | 0x80, | ||
343 | 0x80, | ||
344 | 0x80, | ||
345 | 0x80, | ||
346 | 0x80, | ||
347 | 0x80, | ||
348 | 0x80, | ||
349 | 0x80); | ||
350 | |||
351 | __m256i complexVal1, complexVal2, iOutputVal; | ||
352 | |||
353 | 2 | unsigned int sixteenthPoints = num_points / 16; | |
354 | |||
355 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (number = 0; number < sixteenthPoints; number++) { |
356 | 16382 | complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr); | |
357 | 16382 | complexVectorPtr += 16; | |
358 | 16382 | complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr); | |
359 | 16382 | complexVectorPtr += 16; | |
360 | |||
361 | 16382 | complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1); | |
362 | 16382 | complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2); | |
363 | |||
364 | 16382 | iOutputVal = _mm256_or_si256(complexVal1, complexVal2); | |
365 | 16382 | iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8); | |
366 | |||
367 | _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal); | ||
368 | |||
369 | 16382 | iBufferPtr += 16; | |
370 | } | ||
371 | |||
372 | 2 | number = sixteenthPoints * 16; | |
373 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
374 | 30 | *iBufferPtr++ = *complexVectorPtr++; | |
375 | 30 | complexVectorPtr++; | |
376 | } | ||
377 | 2 | } | |
378 | #endif /* LV_HAVE_AVX2 */ | ||
379 | |||
380 | #endif /* INCLUDED_volk_16ic_deinterleave_real_16i_u_H */ | ||
381 |