Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_8ic_deinterleave_real_16i | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Deinterleaves the complex 8-bit char vector into just the I (real) | ||
16 | * vector and converts it to 16-bit shorts. | ||
17 | * | ||
18 | * <b>Dispatcher Prototype</b> | ||
19 | * \code | ||
20 | * void volk_8ic_deinterleave_real_16i(int16_t* iBuffer, const lv_8sc_t* complexVector, | ||
21 | * unsigned int num_points) \endcode | ||
22 | * | ||
23 | * \b Inputs | ||
24 | * \li complexVector: The complex input vector. | ||
25 | * \li num_points: The number of complex data values to be deinterleaved. | ||
26 | * | ||
27 | * \b Outputs | ||
28 | * \li iBuffer: The I buffer output data. | ||
29 | * | ||
30 | * \b Example | ||
31 | * \code | ||
32 | * int N = 10000; | ||
33 | * | ||
34 | * volk_8ic_deinterleave_real_16i(); | ||
35 | * | ||
36 | * volk_free(x); | ||
37 | * \endcode | ||
38 | */ | ||
39 | |||
40 | #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H | ||
41 | #define INCLUDED_volk_8ic_deinterleave_real_16i_a_H | ||
42 | |||
43 | #include <inttypes.h> | ||
44 | #include <stdio.h> | ||
45 | |||
46 | |||
47 | #ifdef LV_HAVE_AVX2 | ||
48 | #include <immintrin.h> | ||
49 | |||
50 | 2 | static inline void volk_8ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer, | |
51 | const lv_8sc_t* complexVector, | ||
52 | unsigned int num_points) | ||
53 | { | ||
54 | 2 | unsigned int number = 0; | |
55 | 2 | const int8_t* complexVectorPtr = (int8_t*)complexVector; | |
56 | 2 | int16_t* iBufferPtr = iBuffer; | |
57 | 2 | __m256i moveMask = _mm256_set_epi8(0x80, | |
58 | 0x80, | ||
59 | 0x80, | ||
60 | 0x80, | ||
61 | 0x80, | ||
62 | 0x80, | ||
63 | 0x80, | ||
64 | 0x80, | ||
65 | 14, | ||
66 | 12, | ||
67 | 10, | ||
68 | 8, | ||
69 | 6, | ||
70 | 4, | ||
71 | 2, | ||
72 | 0, | ||
73 | 0x80, | ||
74 | 0x80, | ||
75 | 0x80, | ||
76 | 0x80, | ||
77 | 0x80, | ||
78 | 0x80, | ||
79 | 0x80, | ||
80 | 0x80, | ||
81 | 14, | ||
82 | 12, | ||
83 | 10, | ||
84 | 8, | ||
85 | 6, | ||
86 | 4, | ||
87 | 2, | ||
88 | 0); | ||
89 | __m256i complexVal, outputVal; | ||
90 | __m128i outputVal0; | ||
91 | |||
92 | 2 | unsigned int sixteenthPoints = num_points / 16; | |
93 | |||
94 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (number = 0; number < sixteenthPoints; number++) { |
95 | 16382 | complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); | |
96 | 16382 | complexVectorPtr += 32; | |
97 | |||
98 | 16382 | complexVal = _mm256_shuffle_epi8(complexVal, moveMask); | |
99 | 16382 | complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8); | |
100 | |||
101 | 16382 | outputVal0 = _mm256_extractf128_si256(complexVal, 0); | |
102 | |||
103 | 16382 | outputVal = _mm256_cvtepi8_epi16(outputVal0); | |
104 | 16382 | outputVal = _mm256_slli_epi16(outputVal, 7); | |
105 | |||
106 | _mm256_store_si256((__m256i*)iBufferPtr, outputVal); | ||
107 | |||
108 | 16382 | iBufferPtr += 16; | |
109 | } | ||
110 | |||
111 | 2 | number = sixteenthPoints * 16; | |
112 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
113 | 30 | *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128; | |
114 | 30 | complexVectorPtr++; | |
115 | } | ||
116 | 2 | } | |
117 | #endif /* LV_HAVE_AVX2 */ | ||
118 | |||
119 | #ifdef LV_HAVE_SSE4_1 | ||
120 | #include <smmintrin.h> | ||
121 | |||
122 | 2 | static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer, | |
123 | const lv_8sc_t* complexVector, | ||
124 | unsigned int num_points) | ||
125 | { | ||
126 | 2 | unsigned int number = 0; | |
127 | 2 | const int8_t* complexVectorPtr = (int8_t*)complexVector; | |
128 | 2 | int16_t* iBufferPtr = iBuffer; | |
129 | 2 | __m128i moveMask = _mm_set_epi8( | |
130 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); | ||
131 | __m128i complexVal, outputVal; | ||
132 | |||
133 | 2 | unsigned int eighthPoints = num_points / 8; | |
134 | |||
135 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (number = 0; number < eighthPoints; number++) { |
136 | 32766 | complexVal = _mm_load_si128((__m128i*)complexVectorPtr); | |
137 | 32766 | complexVectorPtr += 16; | |
138 | |||
139 | 32766 | complexVal = _mm_shuffle_epi8(complexVal, moveMask); | |
140 | |||
141 | 32766 | outputVal = _mm_cvtepi8_epi16(complexVal); | |
142 | 32766 | outputVal = _mm_slli_epi16(outputVal, 7); | |
143 | |||
144 | _mm_store_si128((__m128i*)iBufferPtr, outputVal); | ||
145 | 32766 | iBufferPtr += 8; | |
146 | } | ||
147 | |||
148 | 2 | number = eighthPoints * 8; | |
149 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
150 | 14 | *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128; | |
151 | 14 | complexVectorPtr++; | |
152 | } | ||
153 | 2 | } | |
154 | #endif /* LV_HAVE_SSE4_1 */ | ||
155 | |||
156 | |||
157 | #ifdef LV_HAVE_AVX | ||
158 | #include <immintrin.h> | ||
159 | |||
160 | 2 | static inline void volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer, | |
161 | const lv_8sc_t* complexVector, | ||
162 | unsigned int num_points) | ||
163 | { | ||
164 | 2 | unsigned int number = 0; | |
165 | 2 | const int8_t* complexVectorPtr = (int8_t*)complexVector; | |
166 | 2 | int16_t* iBufferPtr = iBuffer; | |
167 | 2 | __m128i moveMask = _mm_set_epi8( | |
168 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); | ||
169 | __m256i complexVal, outputVal; | ||
170 | __m128i complexVal1, complexVal0, outputVal1, outputVal0; | ||
171 | |||
172 | 2 | unsigned int sixteenthPoints = num_points / 16; | |
173 | |||
174 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (number = 0; number < sixteenthPoints; number++) { |
175 | 16382 | complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); | |
176 | 16382 | complexVectorPtr += 32; | |
177 | |||
178 | 16382 | complexVal1 = _mm256_extractf128_si256(complexVal, 1); | |
179 | 16382 | complexVal0 = _mm256_extractf128_si256(complexVal, 0); | |
180 | |||
181 | 16382 | outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask); | |
182 | 16382 | outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask); | |
183 | |||
184 | 16382 | outputVal1 = _mm_cvtepi8_epi16(outputVal1); | |
185 | 16382 | outputVal1 = _mm_slli_epi16(outputVal1, 7); | |
186 | 16382 | outputVal0 = _mm_cvtepi8_epi16(outputVal0); | |
187 | 16382 | outputVal0 = _mm_slli_epi16(outputVal0, 7); | |
188 | |||
189 | 16382 | __m256i dummy = _mm256_setzero_si256(); | |
190 | 16382 | outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0); | |
191 | 16382 | outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1); | |
192 | _mm256_store_si256((__m256i*)iBufferPtr, outputVal); | ||
193 | |||
194 | 16382 | iBufferPtr += 16; | |
195 | } | ||
196 | |||
197 | 2 | number = sixteenthPoints * 16; | |
198 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
199 | 30 | *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128; | |
200 | 30 | complexVectorPtr++; | |
201 | } | ||
202 | 2 | } | |
203 | #endif /* LV_HAVE_AVX */ | ||
204 | |||
205 | |||
206 | #ifdef LV_HAVE_GENERIC | ||
207 | |||
208 | 2 | static inline void volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer, | |
209 | const lv_8sc_t* complexVector, | ||
210 | unsigned int num_points) | ||
211 | { | ||
212 | 2 | unsigned int number = 0; | |
213 | 2 | const int8_t* complexVectorPtr = (const int8_t*)complexVector; | |
214 | 2 | int16_t* iBufferPtr = iBuffer; | |
215 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
216 | 262142 | *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128; | |
217 | 262142 | complexVectorPtr++; | |
218 | } | ||
219 | 2 | } | |
220 | #endif /* LV_HAVE_GENERIC */ | ||
221 | |||
222 | |||
223 | #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */ | ||
224 | |||
225 | #ifndef INCLUDED_volk_8ic_deinterleave_real_16i_u_H | ||
226 | #define INCLUDED_volk_8ic_deinterleave_real_16i_u_H | ||
227 | |||
228 | #include <inttypes.h> | ||
229 | #include <stdio.h> | ||
230 | |||
231 | |||
232 | #ifdef LV_HAVE_AVX2 | ||
233 | #include <immintrin.h> | ||
234 | |||
235 | 2 | static inline void volk_8ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer, | |
236 | const lv_8sc_t* complexVector, | ||
237 | unsigned int num_points) | ||
238 | { | ||
239 | 2 | unsigned int number = 0; | |
240 | 2 | const int8_t* complexVectorPtr = (int8_t*)complexVector; | |
241 | 2 | int16_t* iBufferPtr = iBuffer; | |
242 | 2 | __m256i moveMask = _mm256_set_epi8(0x80, | |
243 | 0x80, | ||
244 | 0x80, | ||
245 | 0x80, | ||
246 | 0x80, | ||
247 | 0x80, | ||
248 | 0x80, | ||
249 | 0x80, | ||
250 | 14, | ||
251 | 12, | ||
252 | 10, | ||
253 | 8, | ||
254 | 6, | ||
255 | 4, | ||
256 | 2, | ||
257 | 0, | ||
258 | 0x80, | ||
259 | 0x80, | ||
260 | 0x80, | ||
261 | 0x80, | ||
262 | 0x80, | ||
263 | 0x80, | ||
264 | 0x80, | ||
265 | 0x80, | ||
266 | 14, | ||
267 | 12, | ||
268 | 10, | ||
269 | 8, | ||
270 | 6, | ||
271 | 4, | ||
272 | 2, | ||
273 | 0); | ||
274 | __m256i complexVal, outputVal; | ||
275 | __m128i outputVal0; | ||
276 | |||
277 | 2 | unsigned int sixteenthPoints = num_points / 16; | |
278 | |||
279 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (number = 0; number < sixteenthPoints; number++) { |
280 | 16382 | complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); | |
281 | 16382 | complexVectorPtr += 32; | |
282 | |||
283 | 16382 | complexVal = _mm256_shuffle_epi8(complexVal, moveMask); | |
284 | 16382 | complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8); | |
285 | |||
286 | 16382 | outputVal0 = _mm256_extractf128_si256(complexVal, 0); | |
287 | |||
288 | 16382 | outputVal = _mm256_cvtepi8_epi16(outputVal0); | |
289 | 16382 | outputVal = _mm256_slli_epi16(outputVal, 7); | |
290 | |||
291 | _mm256_storeu_si256((__m256i*)iBufferPtr, outputVal); | ||
292 | |||
293 | 16382 | iBufferPtr += 16; | |
294 | } | ||
295 | |||
296 | 2 | number = sixteenthPoints * 16; | |
297 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
298 | 30 | *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128; | |
299 | 30 | complexVectorPtr++; | |
300 | } | ||
301 | 2 | } | |
302 | #endif /* LV_HAVE_AVX2 */ | ||
303 | #endif /* INCLUDED_volk_8ic_deinterleave_real_16i_u_H */ | ||
304 |