Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_8ic_s32f_deinterleave_real_32f | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Deinterleaves the complex 8-bit char vector into just the real (I) | ||
16 | * vector, converts the samples to floats, and divides the results by | ||
17 | * the scalar factor. | ||
18 | * | ||
19 | * <b>Dispatcher Prototype</b> | ||
20 | * \code | ||
21 | * void volk_8ic_s32f_deinterleave_real_32f(float* iBuffer, const lv_8sc_t* complexVector, | ||
22 | * const float scalar, unsigned int num_points) \endcode | ||
23 | * | ||
24 | * \b Inputs | ||
25 | * \li complexVector: The complex input vector. | ||
26 | * \li scalar: The scalar value used to divide the floating point results. | ||
27 | * \li num_points: The number of complex data values to be deinterleaved. | ||
28 | * | ||
29 | * \b Outputs | ||
30 | * \li iBuffer: The I buffer output data. | ||
31 | * | ||
32 | * \b Example | ||
33 | * \code | ||
34 | * int N = 10000; | ||
35 | * | ||
36 | * volk_8ic_s32f_deinterleave_real_32f(); | ||
37 | * | ||
38 | * volk_free(x); | ||
39 | * \endcode | ||
40 | */ | ||
41 | |||
42 | #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H | ||
43 | #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H | ||
44 | |||
45 | #include <inttypes.h> | ||
46 | #include <stdio.h> | ||
47 | #include <volk/volk_common.h> | ||
48 | |||
49 | #ifdef LV_HAVE_AVX2 | ||
50 | #include <immintrin.h> | ||
51 | |||
52 | static inline void | ||
53 | 2 | volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer, | |
54 | const lv_8sc_t* complexVector, | ||
55 | const float scalar, | ||
56 | unsigned int num_points) | ||
57 | { | ||
58 | 2 | float* iBufferPtr = iBuffer; | |
59 | |||
60 | 2 | unsigned int number = 0; | |
61 | 2 | const unsigned int sixteenthPoints = num_points / 16; | |
62 | __m256 iFloatValue; | ||
63 | |||
64 | 2 | const float iScalar = 1.0 / scalar; | |
65 | 2 | __m256 invScalar = _mm256_set1_ps(iScalar); | |
66 | __m256i complexVal, iIntVal; | ||
67 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
68 | |||
69 | 2 | __m256i moveMask = _mm256_set_epi8(0x80, | |
70 | 0x80, | ||
71 | 0x80, | ||
72 | 0x80, | ||
73 | 0x80, | ||
74 | 0x80, | ||
75 | 0x80, | ||
76 | 0x80, | ||
77 | 14, | ||
78 | 12, | ||
79 | 10, | ||
80 | 8, | ||
81 | 6, | ||
82 | 4, | ||
83 | 2, | ||
84 | 0, | ||
85 | 0x80, | ||
86 | 0x80, | ||
87 | 0x80, | ||
88 | 0x80, | ||
89 | 0x80, | ||
90 | 0x80, | ||
91 | 0x80, | ||
92 | 0x80, | ||
93 | 14, | ||
94 | 12, | ||
95 | 10, | ||
96 | 8, | ||
97 | 6, | ||
98 | 4, | ||
99 | 2, | ||
100 | 0); | ||
101 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (; number < sixteenthPoints; number++) { |
102 | 16382 | complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); | |
103 | 16382 | complexVectorPtr += 32; | |
104 | 16382 | complexVal = _mm256_shuffle_epi8(complexVal, moveMask); | |
105 | |||
106 | 32764 | iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal)); | |
107 | 16382 | iFloatValue = _mm256_cvtepi32_ps(iIntVal); | |
108 | 16382 | iFloatValue = _mm256_mul_ps(iFloatValue, invScalar); | |
109 | _mm256_store_ps(iBufferPtr, iFloatValue); | ||
110 | 16382 | iBufferPtr += 8; | |
111 | |||
112 | 16382 | complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110); | |
113 | 32764 | iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal)); | |
114 | 16382 | iFloatValue = _mm256_cvtepi32_ps(iIntVal); | |
115 | 16382 | iFloatValue = _mm256_mul_ps(iFloatValue, invScalar); | |
116 | _mm256_store_ps(iBufferPtr, iFloatValue); | ||
117 | 16382 | iBufferPtr += 8; | |
118 | } | ||
119 | |||
120 | 2 | number = sixteenthPoints * 16; | |
121 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
122 | 30 | *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar; | |
123 | 30 | complexVectorPtr++; | |
124 | } | ||
125 | 2 | } | |
126 | #endif /* LV_HAVE_AVX2 */ | ||
127 | |||
128 | |||
129 | #ifdef LV_HAVE_SSE4_1 | ||
130 | #include <smmintrin.h> | ||
131 | |||
132 | static inline void | ||
133 | 2 | volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, | |
134 | const lv_8sc_t* complexVector, | ||
135 | const float scalar, | ||
136 | unsigned int num_points) | ||
137 | { | ||
138 | 2 | float* iBufferPtr = iBuffer; | |
139 | |||
140 | 2 | unsigned int number = 0; | |
141 | 2 | const unsigned int eighthPoints = num_points / 8; | |
142 | __m128 iFloatValue; | ||
143 | |||
144 | 2 | const float iScalar = 1.0 / scalar; | |
145 | 2 | __m128 invScalar = _mm_set_ps1(iScalar); | |
146 | __m128i complexVal, iIntVal; | ||
147 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
148 | |||
149 | 2 | __m128i moveMask = _mm_set_epi8( | |
150 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0); | ||
151 | |||
152 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
153 | 32766 | complexVal = _mm_load_si128((__m128i*)complexVectorPtr); | |
154 | 32766 | complexVectorPtr += 16; | |
155 | 32766 | complexVal = _mm_shuffle_epi8(complexVal, moveMask); | |
156 | |||
157 | 32766 | iIntVal = _mm_cvtepi8_epi32(complexVal); | |
158 | 32766 | iFloatValue = _mm_cvtepi32_ps(iIntVal); | |
159 | |||
160 | 32766 | iFloatValue = _mm_mul_ps(iFloatValue, invScalar); | |
161 | |||
162 | _mm_store_ps(iBufferPtr, iFloatValue); | ||
163 | |||
164 | 32766 | iBufferPtr += 4; | |
165 | |||
166 | 32766 | complexVal = _mm_srli_si128(complexVal, 4); | |
167 | 32766 | iIntVal = _mm_cvtepi8_epi32(complexVal); | |
168 | 32766 | iFloatValue = _mm_cvtepi32_ps(iIntVal); | |
169 | |||
170 | 32766 | iFloatValue = _mm_mul_ps(iFloatValue, invScalar); | |
171 | |||
172 | _mm_store_ps(iBufferPtr, iFloatValue); | ||
173 | |||
174 | 32766 | iBufferPtr += 4; | |
175 | } | ||
176 | |||
177 | 2 | number = eighthPoints * 8; | |
178 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
179 | 14 | *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar; | |
180 | 14 | complexVectorPtr++; | |
181 | } | ||
182 | 2 | } | |
183 | #endif /* LV_HAVE_SSE4_1 */ | ||
184 | |||
185 | |||
186 | #ifdef LV_HAVE_SSE | ||
187 | #include <xmmintrin.h> | ||
188 | |||
189 | static inline void | ||
190 | 2 | volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, | |
191 | const lv_8sc_t* complexVector, | ||
192 | const float scalar, | ||
193 | unsigned int num_points) | ||
194 | { | ||
195 | 2 | float* iBufferPtr = iBuffer; | |
196 | |||
197 | 2 | unsigned int number = 0; | |
198 | 2 | const unsigned int quarterPoints = num_points / 4; | |
199 | __m128 iValue; | ||
200 | |||
201 | 2 | const float iScalar = 1.0 / scalar; | |
202 | 2 | __m128 invScalar = _mm_set_ps1(iScalar); | |
203 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
204 | |||
205 | __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; | ||
206 | |||
207 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
208 | 65534 | floatBuffer[0] = (float)(*complexVectorPtr); | |
209 | 65534 | complexVectorPtr += 2; | |
210 | 65534 | floatBuffer[1] = (float)(*complexVectorPtr); | |
211 | 65534 | complexVectorPtr += 2; | |
212 | 65534 | floatBuffer[2] = (float)(*complexVectorPtr); | |
213 | 65534 | complexVectorPtr += 2; | |
214 | 65534 | floatBuffer[3] = (float)(*complexVectorPtr); | |
215 | 65534 | complexVectorPtr += 2; | |
216 | |||
217 | 65534 | iValue = _mm_load_ps(floatBuffer); | |
218 | |||
219 | 65534 | iValue = _mm_mul_ps(iValue, invScalar); | |
220 | |||
221 | _mm_store_ps(iBufferPtr, iValue); | ||
222 | |||
223 | 65534 | iBufferPtr += 4; | |
224 | } | ||
225 | |||
226 | 2 | number = quarterPoints * 4; | |
227 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
228 | 6 | *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar; | |
229 | 6 | complexVectorPtr++; | |
230 | } | ||
231 | 2 | } | |
232 | #endif /* LV_HAVE_SSE */ | ||
233 | |||
234 | |||
235 | #ifdef LV_HAVE_GENERIC | ||
236 | |||
237 | static inline void | ||
238 | 2 | volk_8ic_s32f_deinterleave_real_32f_generic(float* iBuffer, | |
239 | const lv_8sc_t* complexVector, | ||
240 | const float scalar, | ||
241 | unsigned int num_points) | ||
242 | { | ||
243 | 2 | unsigned int number = 0; | |
244 | 2 | const int8_t* complexVectorPtr = (const int8_t*)complexVector; | |
245 | 2 | float* iBufferPtr = iBuffer; | |
246 | 2 | const float invScalar = 1.0 / scalar; | |
247 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
248 | 262142 | *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar; | |
249 | 262142 | complexVectorPtr++; | |
250 | } | ||
251 | 2 | } | |
252 | #endif /* LV_HAVE_GENERIC */ | ||
253 | |||
254 | |||
255 | #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */ | ||
256 | |||
257 | #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H | ||
258 | #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H | ||
259 | |||
260 | #include <inttypes.h> | ||
261 | #include <stdio.h> | ||
262 | #include <volk/volk_common.h> | ||
263 | |||
264 | #ifdef LV_HAVE_AVX2 | ||
265 | #include <immintrin.h> | ||
266 | |||
267 | static inline void | ||
268 | 2 | volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer, | |
269 | const lv_8sc_t* complexVector, | ||
270 | const float scalar, | ||
271 | unsigned int num_points) | ||
272 | { | ||
273 | 2 | float* iBufferPtr = iBuffer; | |
274 | |||
275 | 2 | unsigned int number = 0; | |
276 | 2 | const unsigned int sixteenthPoints = num_points / 16; | |
277 | __m256 iFloatValue; | ||
278 | |||
279 | 2 | const float iScalar = 1.0 / scalar; | |
280 | 2 | __m256 invScalar = _mm256_set1_ps(iScalar); | |
281 | __m256i complexVal, iIntVal; | ||
282 | __m128i hcomplexVal; | ||
283 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
284 | |||
285 | 2 | __m256i moveMask = _mm256_set_epi8(0x80, | |
286 | 0x80, | ||
287 | 0x80, | ||
288 | 0x80, | ||
289 | 0x80, | ||
290 | 0x80, | ||
291 | 0x80, | ||
292 | 0x80, | ||
293 | 14, | ||
294 | 12, | ||
295 | 10, | ||
296 | 8, | ||
297 | 6, | ||
298 | 4, | ||
299 | 2, | ||
300 | 0, | ||
301 | 0x80, | ||
302 | 0x80, | ||
303 | 0x80, | ||
304 | 0x80, | ||
305 | 0x80, | ||
306 | 0x80, | ||
307 | 0x80, | ||
308 | 0x80, | ||
309 | 14, | ||
310 | 12, | ||
311 | 10, | ||
312 | 8, | ||
313 | 6, | ||
314 | 4, | ||
315 | 2, | ||
316 | 0); | ||
317 | |||
318 |
2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.
|
16384 | for (; number < sixteenthPoints; number++) { |
319 | 16382 | complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); | |
320 | 16382 | complexVectorPtr += 32; | |
321 | 16382 | complexVal = _mm256_shuffle_epi8(complexVal, moveMask); | |
322 | |||
323 | 16382 | hcomplexVal = _mm256_extracti128_si256(complexVal, 0); | |
324 | 16382 | iIntVal = _mm256_cvtepi8_epi32(hcomplexVal); | |
325 | 16382 | iFloatValue = _mm256_cvtepi32_ps(iIntVal); | |
326 | |||
327 | 16382 | iFloatValue = _mm256_mul_ps(iFloatValue, invScalar); | |
328 | |||
329 | _mm256_storeu_ps(iBufferPtr, iFloatValue); | ||
330 | |||
331 | 16382 | iBufferPtr += 8; | |
332 | |||
333 | 16382 | hcomplexVal = _mm256_extracti128_si256(complexVal, 1); | |
334 | 16382 | iIntVal = _mm256_cvtepi8_epi32(hcomplexVal); | |
335 | 16382 | iFloatValue = _mm256_cvtepi32_ps(iIntVal); | |
336 | |||
337 | 16382 | iFloatValue = _mm256_mul_ps(iFloatValue, invScalar); | |
338 | |||
339 | _mm256_storeu_ps(iBufferPtr, iFloatValue); | ||
340 | |||
341 | 16382 | iBufferPtr += 8; | |
342 | } | ||
343 | |||
344 | 2 | number = sixteenthPoints * 16; | |
345 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.
|
32 | for (; number < num_points; number++) { |
346 | 30 | *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar; | |
347 | 30 | complexVectorPtr++; | |
348 | } | ||
349 | 2 | } | |
350 | #endif /* LV_HAVE_AVX2 */ | ||
351 | |||
352 | |||
353 | #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */ | ||
354 |