Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_16ic_s32f_deinterleave_real_32f | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Deinterleaves the complex 16 bit vector and returns just the real | ||
16 | * part (inphase) of the data as a vector of floats that have been | ||
17 | * scaled. | ||
18 | * | ||
19 | * <b>Dispatcher Prototype</b> | ||
20 | * \code | ||
21 | * void volk_16ic_s32f_deinterleave_real_32f(float* iBuffer, const lv_16sc_t* | ||
22 | * complexVector, const float scalar, unsigned int num_points){ \endcode | ||
23 | * | ||
24 | * \b Inputs | ||
25 | * \li complexVector: The complex input vector of 16-bit shorts. | ||
26 | * \li scalar: The value to be divided against each sample of the input complex vector. | ||
27 | * \li num_points: The number of complex data values to be deinterleaved. | ||
28 | * | ||
29 | * \b Outputs | ||
30 | * \li iBuffer: The floating point I buffer output data. | ||
31 | * | ||
32 | * \b Example | ||
33 | * \code | ||
34 | * int N = 10000; | ||
35 | * | ||
36 | * volk_16ic_s32f_deinterleave_real_32f(); | ||
37 | * | ||
38 | * volk_free(x); | ||
39 | * volk_free(t); | ||
40 | * \endcode | ||
41 | */ | ||
42 | |||
43 | #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H | ||
44 | #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H | ||
45 | |||
46 | #include <inttypes.h> | ||
47 | #include <stdio.h> | ||
48 | #include <volk/volk_common.h> | ||
49 | |||
50 | #ifdef LV_HAVE_AVX2 | ||
51 | #include <immintrin.h> | ||
52 | |||
53 | static inline void | ||
54 | 2 | volk_16ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer, | |
55 | const lv_16sc_t* complexVector, | ||
56 | const float scalar, | ||
57 | unsigned int num_points) | ||
58 | { | ||
59 | 2 | float* iBufferPtr = iBuffer; | |
60 | |||
61 | 2 | unsigned int number = 0; | |
62 | 2 | const unsigned int eighthPoints = num_points / 8; | |
63 | |||
64 | __m256 iFloatValue; | ||
65 | |||
66 | 2 | const float iScalar = 1.0 / scalar; | |
67 | 2 | __m256 invScalar = _mm256_set1_ps(iScalar); | |
68 | __m256i complexVal, iIntVal; | ||
69 | __m128i complexVal128; | ||
70 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
71 | |||
72 | 2 | __m256i moveMask = _mm256_set_epi8(0x80, | |
73 | 0x80, | ||
74 | 0x80, | ||
75 | 0x80, | ||
76 | 0x80, | ||
77 | 0x80, | ||
78 | 0x80, | ||
79 | 0x80, | ||
80 | 13, | ||
81 | 12, | ||
82 | 9, | ||
83 | 8, | ||
84 | 5, | ||
85 | 4, | ||
86 | 1, | ||
87 | 0, | ||
88 | 0x80, | ||
89 | 0x80, | ||
90 | 0x80, | ||
91 | 0x80, | ||
92 | 0x80, | ||
93 | 0x80, | ||
94 | 0x80, | ||
95 | 0x80, | ||
96 | 13, | ||
97 | 12, | ||
98 | 9, | ||
99 | 8, | ||
100 | 5, | ||
101 | 4, | ||
102 | 1, | ||
103 | 0); | ||
104 | |||
105 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
106 | 32766 | complexVal = _mm256_load_si256((__m256i*)complexVectorPtr); | |
107 | 32766 | complexVectorPtr += 32; | |
108 | 32766 | complexVal = _mm256_shuffle_epi8(complexVal, moveMask); | |
109 | 32766 | complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8); | |
110 | 32766 | complexVal128 = _mm256_extracti128_si256(complexVal, 0); | |
111 | |||
112 | 32766 | iIntVal = _mm256_cvtepi16_epi32(complexVal128); | |
113 | 32766 | iFloatValue = _mm256_cvtepi32_ps(iIntVal); | |
114 | |||
115 | 32766 | iFloatValue = _mm256_mul_ps(iFloatValue, invScalar); | |
116 | |||
117 | _mm256_store_ps(iBufferPtr, iFloatValue); | ||
118 | |||
119 | 32766 | iBufferPtr += 8; | |
120 | } | ||
121 | |||
122 | 2 | number = eighthPoints * 8; | |
123 | 2 | int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number]; | |
124 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
125 | 14 | *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar; | |
126 | 14 | sixteenTComplexVectorPtr++; | |
127 | } | ||
128 | 2 | } | |
129 | #endif /* LV_HAVE_AVX2 */ | ||
130 | |||
131 | #ifdef LV_HAVE_SSE4_1 | ||
132 | #include <smmintrin.h> | ||
133 | |||
134 | static inline void | ||
135 | 2 | volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer, | |
136 | const lv_16sc_t* complexVector, | ||
137 | const float scalar, | ||
138 | unsigned int num_points) | ||
139 | { | ||
140 | 2 | float* iBufferPtr = iBuffer; | |
141 | |||
142 | 2 | unsigned int number = 0; | |
143 | 2 | const unsigned int quarterPoints = num_points / 4; | |
144 | |||
145 | __m128 iFloatValue; | ||
146 | |||
147 | 2 | const float iScalar = 1.0 / scalar; | |
148 | 2 | __m128 invScalar = _mm_set_ps1(iScalar); | |
149 | __m128i complexVal, iIntVal; | ||
150 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
151 | |||
152 | 2 | __m128i moveMask = _mm_set_epi8( | |
153 | 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0); | ||
154 | |||
155 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
156 | 65534 | complexVal = _mm_load_si128((__m128i*)complexVectorPtr); | |
157 | 65534 | complexVectorPtr += 16; | |
158 | 65534 | complexVal = _mm_shuffle_epi8(complexVal, moveMask); | |
159 | |||
160 | 65534 | iIntVal = _mm_cvtepi16_epi32(complexVal); | |
161 | 65534 | iFloatValue = _mm_cvtepi32_ps(iIntVal); | |
162 | |||
163 | 65534 | iFloatValue = _mm_mul_ps(iFloatValue, invScalar); | |
164 | |||
165 | _mm_store_ps(iBufferPtr, iFloatValue); | ||
166 | |||
167 | 65534 | iBufferPtr += 4; | |
168 | } | ||
169 | |||
170 | 2 | number = quarterPoints * 4; | |
171 | 2 | int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number]; | |
172 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
173 | 6 | *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar; | |
174 | 6 | sixteenTComplexVectorPtr++; | |
175 | } | ||
176 | 2 | } | |
177 | #endif /* LV_HAVE_SSE4_1 */ | ||
178 | |||
179 | #ifdef LV_HAVE_SSE | ||
180 | #include <xmmintrin.h> | ||
181 | |||
182 | static inline void | ||
183 | 2 | volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, | |
184 | const lv_16sc_t* complexVector, | ||
185 | const float scalar, | ||
186 | unsigned int num_points) | ||
187 | { | ||
188 | 2 | float* iBufferPtr = iBuffer; | |
189 | |||
190 | 2 | unsigned int number = 0; | |
191 | 2 | const unsigned int quarterPoints = num_points / 4; | |
192 | __m128 iValue; | ||
193 | |||
194 | 2 | const float iScalar = 1.0 / scalar; | |
195 | 2 | __m128 invScalar = _mm_set_ps1(iScalar); | |
196 | 2 | int16_t* complexVectorPtr = (int16_t*)complexVector; | |
197 | |||
198 | __VOLK_ATTR_ALIGNED(16) float floatBuffer[4]; | ||
199 | |||
200 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
201 | 65534 | floatBuffer[0] = (float)(*complexVectorPtr); | |
202 | 65534 | complexVectorPtr += 2; | |
203 | 65534 | floatBuffer[1] = (float)(*complexVectorPtr); | |
204 | 65534 | complexVectorPtr += 2; | |
205 | 65534 | floatBuffer[2] = (float)(*complexVectorPtr); | |
206 | 65534 | complexVectorPtr += 2; | |
207 | 65534 | floatBuffer[3] = (float)(*complexVectorPtr); | |
208 | 65534 | complexVectorPtr += 2; | |
209 | |||
210 | 65534 | iValue = _mm_load_ps(floatBuffer); | |
211 | |||
212 | 65534 | iValue = _mm_mul_ps(iValue, invScalar); | |
213 | |||
214 | _mm_store_ps(iBufferPtr, iValue); | ||
215 | |||
216 | 65534 | iBufferPtr += 4; | |
217 | } | ||
218 | |||
219 | 2 | number = quarterPoints * 4; | |
220 | 2 | complexVectorPtr = (int16_t*)&complexVector[number]; | |
221 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
222 | 6 | *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar; | |
223 | 6 | complexVectorPtr++; | |
224 | } | ||
225 | 2 | } | |
226 | #endif /* LV_HAVE_SSE */ | ||
227 | |||
228 | #ifdef LV_HAVE_GENERIC | ||
229 | static inline void | ||
230 | 2 | volk_16ic_s32f_deinterleave_real_32f_generic(float* iBuffer, | |
231 | const lv_16sc_t* complexVector, | ||
232 | const float scalar, | ||
233 | unsigned int num_points) | ||
234 | { | ||
235 | 2 | unsigned int number = 0; | |
236 | 2 | const int16_t* complexVectorPtr = (const int16_t*)complexVector; | |
237 | 2 | float* iBufferPtr = iBuffer; | |
238 | 2 | const float invScalar = 1.0 / scalar; | |
239 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
240 | 262142 | *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar; | |
241 | 262142 | complexVectorPtr++; | |
242 | } | ||
243 | 2 | } | |
244 | #endif /* LV_HAVE_GENERIC */ | ||
245 | |||
246 | |||
247 | #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */ | ||
248 | |||
249 | #ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H | ||
250 | #define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H | ||
251 | |||
252 | #include <inttypes.h> | ||
253 | #include <stdio.h> | ||
254 | #include <volk/volk_common.h> | ||
255 | |||
256 | #ifdef LV_HAVE_AVX2 | ||
257 | #include <immintrin.h> | ||
258 | |||
259 | static inline void | ||
260 | 2 | volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer, | |
261 | const lv_16sc_t* complexVector, | ||
262 | const float scalar, | ||
263 | unsigned int num_points) | ||
264 | { | ||
265 | 2 | float* iBufferPtr = iBuffer; | |
266 | |||
267 | 2 | unsigned int number = 0; | |
268 | 2 | const unsigned int eighthPoints = num_points / 8; | |
269 | |||
270 | __m256 iFloatValue; | ||
271 | |||
272 | 2 | const float iScalar = 1.0 / scalar; | |
273 | 2 | __m256 invScalar = _mm256_set1_ps(iScalar); | |
274 | __m256i complexVal, iIntVal; | ||
275 | __m128i complexVal128; | ||
276 | 2 | int8_t* complexVectorPtr = (int8_t*)complexVector; | |
277 | |||
278 | 2 | __m256i moveMask = _mm256_set_epi8(0x80, | |
279 | 0x80, | ||
280 | 0x80, | ||
281 | 0x80, | ||
282 | 0x80, | ||
283 | 0x80, | ||
284 | 0x80, | ||
285 | 0x80, | ||
286 | 13, | ||
287 | 12, | ||
288 | 9, | ||
289 | 8, | ||
290 | 5, | ||
291 | 4, | ||
292 | 1, | ||
293 | 0, | ||
294 | 0x80, | ||
295 | 0x80, | ||
296 | 0x80, | ||
297 | 0x80, | ||
298 | 0x80, | ||
299 | 0x80, | ||
300 | 0x80, | ||
301 | 0x80, | ||
302 | 13, | ||
303 | 12, | ||
304 | 9, | ||
305 | 8, | ||
306 | 5, | ||
307 | 4, | ||
308 | 1, | ||
309 | 0); | ||
310 | |||
311 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < eighthPoints; number++) { |
312 | 32766 | complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr); | |
313 | 32766 | complexVectorPtr += 32; | |
314 | 32766 | complexVal = _mm256_shuffle_epi8(complexVal, moveMask); | |
315 | 32766 | complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8); | |
316 | 32766 | complexVal128 = _mm256_extracti128_si256(complexVal, 0); | |
317 | |||
318 | 32766 | iIntVal = _mm256_cvtepi16_epi32(complexVal128); | |
319 | 32766 | iFloatValue = _mm256_cvtepi32_ps(iIntVal); | |
320 | |||
321 | 32766 | iFloatValue = _mm256_mul_ps(iFloatValue, invScalar); | |
322 | |||
323 | _mm256_storeu_ps(iBufferPtr, iFloatValue); | ||
324 | |||
325 | 32766 | iBufferPtr += 8; | |
326 | } | ||
327 | |||
328 | 2 | number = eighthPoints * 8; | |
329 | 2 | int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number]; | |
330 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
331 | 14 | *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar; | |
332 | 14 | sixteenTComplexVectorPtr++; | |
333 | } | ||
334 | 2 | } | |
335 | #endif /* LV_HAVE_AVX2 */ | ||
336 | |||
337 | #endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */ | ||
338 |