Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32fc_deinterleave_64f_x2 | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Deinterleaves the complex floating point vector into I & Q vector | ||
16 | * data. The output vectors are converted to doubles. | ||
17 | * | ||
18 | * <b>Dispatcher Prototype</b> | ||
19 | * \code | ||
20 | * void volk_32fc_deinterleave_64f_x2(double* iBuffer, double* qBuffer, const | ||
21 | * lv_32fc_t* complexVector, unsigned int num_points) \endcode | ||
22 | * | ||
23 | * \b Inputs | ||
24 | * \li complexVector: The complex input vector. | ||
25 | * \li num_points: The number of complex data values to be deinterleaved. | ||
26 | * | ||
27 | * \b Outputs | ||
28 | * \li iBuffer: The I buffer output data. | ||
29 | * \li qBuffer: The Q buffer output data. | ||
30 | * | ||
31 | * \b Example | ||
32 | * Generate complex numbers around the top half of the unit circle and | ||
33 | * deinterleave in to real and imaginary double buffers. | ||
34 | * \code | ||
35 | * int N = 10; | ||
36 | * unsigned int alignment = volk_get_alignment(); | ||
37 | * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment); | ||
38 | * double* re = (double*)volk_malloc(sizeof(double)*N, alignment); | ||
39 | * double* im = (double*)volk_malloc(sizeof(double)*N, alignment); | ||
40 | * | ||
41 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
42 | * float real = 2.f * ((float)ii / (float)N) - 1.f; | ||
43 | * float imag = std::sqrt(1.f - real * real); | ||
44 | * in[ii] = lv_cmake(real, imag); | ||
45 | * } | ||
46 | * | ||
47 | * volk_32fc_deinterleave_64f_x2(re, im, in, N); | ||
48 | * | ||
49 | * printf(" re | im\n"); | ||
50 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
51 | * printf("out(%i) = %+.1g | %+.1g\n", ii, re[ii], im[ii]); | ||
52 | * } | ||
53 | * | ||
54 | * volk_free(in); | ||
55 | * volk_free(re); | ||
56 | * volk_free(im); | ||
57 | * \endcode | ||
58 | */ | ||
59 | |||
60 | #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H | ||
61 | #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H | ||
62 | |||
63 | #include <inttypes.h> | ||
64 | #include <stdio.h> | ||
65 | |||
66 | #ifdef LV_HAVE_AVX | ||
67 | #include <immintrin.h> | ||
68 | |||
69 | 2 | static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer, | |
70 | double* qBuffer, | ||
71 | const lv_32fc_t* complexVector, | ||
72 | unsigned int num_points) | ||
73 | { | ||
74 | 2 | unsigned int number = 0; | |
75 | |||
76 | 2 | const float* complexVectorPtr = (float*)complexVector; | |
77 | 2 | double* iBufferPtr = iBuffer; | |
78 | 2 | double* qBufferPtr = qBuffer; | |
79 | |||
80 | 2 | const unsigned int quarterPoints = num_points / 4; | |
81 | __m256 cplxValue; | ||
82 | __m128 complexH, complexL, fVal; | ||
83 | __m256d dVal; | ||
84 | |||
85 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
86 | |||
87 | 65534 | cplxValue = _mm256_loadu_ps(complexVectorPtr); | |
88 | 65534 | complexVectorPtr += 8; | |
89 | |||
90 | 65534 | complexH = _mm256_extractf128_ps(cplxValue, 1); | |
91 | 65534 | complexL = _mm256_extractf128_ps(cplxValue, 0); | |
92 | |||
93 | // Arrange in i1i2i1i2 format | ||
94 | 65534 | fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0)); | |
95 | 65534 | dVal = _mm256_cvtps_pd(fVal); | |
96 | _mm256_storeu_pd(iBufferPtr, dVal); | ||
97 | |||
98 | // Arrange in q1q2q1q2 format | ||
99 | 65534 | fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1)); | |
100 | 65534 | dVal = _mm256_cvtps_pd(fVal); | |
101 | _mm256_storeu_pd(qBufferPtr, dVal); | ||
102 | |||
103 | 65534 | iBufferPtr += 4; | |
104 | 65534 | qBufferPtr += 4; | |
105 | } | ||
106 | |||
107 | 2 | number = quarterPoints * 4; | |
108 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
109 | 6 | *iBufferPtr++ = *complexVectorPtr++; | |
110 | 6 | *qBufferPtr++ = *complexVectorPtr++; | |
111 | } | ||
112 | 2 | } | |
113 | #endif /* LV_HAVE_AVX */ | ||
114 | |||
115 | #ifdef LV_HAVE_SSE2 | ||
116 | #include <emmintrin.h> | ||
117 | |||
118 | 2 | static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer, | |
119 | double* qBuffer, | ||
120 | const lv_32fc_t* complexVector, | ||
121 | unsigned int num_points) | ||
122 | { | ||
123 | 2 | unsigned int number = 0; | |
124 | |||
125 | 2 | const float* complexVectorPtr = (float*)complexVector; | |
126 | 2 | double* iBufferPtr = iBuffer; | |
127 | 2 | double* qBufferPtr = qBuffer; | |
128 | |||
129 | 2 | const unsigned int halfPoints = num_points / 2; | |
130 | __m128 cplxValue, fVal; | ||
131 | __m128d dVal; | ||
132 | |||
133 |
2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
|
131072 | for (; number < halfPoints; number++) { |
134 | |||
135 | 131070 | cplxValue = _mm_loadu_ps(complexVectorPtr); | |
136 | 131070 | complexVectorPtr += 4; | |
137 | |||
138 | // Arrange in i1i2i1i2 format | ||
139 | 131070 | fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0)); | |
140 | 131070 | dVal = _mm_cvtps_pd(fVal); | |
141 | _mm_storeu_pd(iBufferPtr, dVal); | ||
142 | |||
143 | // Arrange in q1q2q1q2 format | ||
144 | 131070 | fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1)); | |
145 | 131070 | dVal = _mm_cvtps_pd(fVal); | |
146 | _mm_storeu_pd(qBufferPtr, dVal); | ||
147 | |||
148 | 131070 | iBufferPtr += 2; | |
149 | 131070 | qBufferPtr += 2; | |
150 | } | ||
151 | |||
152 | 2 | number = halfPoints * 2; | |
153 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | for (; number < num_points; number++) { |
154 | 2 | *iBufferPtr++ = *complexVectorPtr++; | |
155 | 2 | *qBufferPtr++ = *complexVectorPtr++; | |
156 | } | ||
157 | 2 | } | |
158 | #endif /* LV_HAVE_SSE */ | ||
159 | |||
160 | #ifdef LV_HAVE_GENERIC | ||
161 | |||
162 | 2 | static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer, | |
163 | double* qBuffer, | ||
164 | const lv_32fc_t* complexVector, | ||
165 | unsigned int num_points) | ||
166 | { | ||
167 | 2 | unsigned int number = 0; | |
168 | 2 | const float* complexVectorPtr = (float*)complexVector; | |
169 | 2 | double* iBufferPtr = iBuffer; | |
170 | 2 | double* qBufferPtr = qBuffer; | |
171 | |||
172 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
173 | 262142 | *iBufferPtr++ = (double)*complexVectorPtr++; | |
174 | 262142 | *qBufferPtr++ = (double)*complexVectorPtr++; | |
175 | } | ||
176 | 2 | } | |
177 | #endif /* LV_HAVE_GENERIC */ | ||
178 | |||
179 | #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */ | ||
180 | #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H | ||
181 | #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H | ||
182 | |||
183 | #include <inttypes.h> | ||
184 | #include <stdio.h> | ||
185 | |||
186 | #ifdef LV_HAVE_AVX | ||
187 | #include <immintrin.h> | ||
188 | |||
189 | 2 | static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer, | |
190 | double* qBuffer, | ||
191 | const lv_32fc_t* complexVector, | ||
192 | unsigned int num_points) | ||
193 | { | ||
194 | 2 | unsigned int number = 0; | |
195 | |||
196 | 2 | const float* complexVectorPtr = (float*)complexVector; | |
197 | 2 | double* iBufferPtr = iBuffer; | |
198 | 2 | double* qBufferPtr = qBuffer; | |
199 | |||
200 | 2 | const unsigned int quarterPoints = num_points / 4; | |
201 | __m256 cplxValue; | ||
202 | __m128 complexH, complexL, fVal; | ||
203 | __m256d dVal; | ||
204 | |||
205 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
206 | |||
207 | 65534 | cplxValue = _mm256_load_ps(complexVectorPtr); | |
208 | 65534 | complexVectorPtr += 8; | |
209 | |||
210 | 65534 | complexH = _mm256_extractf128_ps(cplxValue, 1); | |
211 | 65534 | complexL = _mm256_extractf128_ps(cplxValue, 0); | |
212 | |||
213 | // Arrange in i1i2i1i2 format | ||
214 | 65534 | fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0)); | |
215 | 65534 | dVal = _mm256_cvtps_pd(fVal); | |
216 | _mm256_store_pd(iBufferPtr, dVal); | ||
217 | |||
218 | // Arrange in q1q2q1q2 format | ||
219 | 65534 | fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1)); | |
220 | 65534 | dVal = _mm256_cvtps_pd(fVal); | |
221 | _mm256_store_pd(qBufferPtr, dVal); | ||
222 | |||
223 | 65534 | iBufferPtr += 4; | |
224 | 65534 | qBufferPtr += 4; | |
225 | } | ||
226 | |||
227 | 2 | number = quarterPoints * 4; | |
228 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
229 | 6 | *iBufferPtr++ = *complexVectorPtr++; | |
230 | 6 | *qBufferPtr++ = *complexVectorPtr++; | |
231 | } | ||
232 | 2 | } | |
233 | #endif /* LV_HAVE_AVX */ | ||
234 | |||
235 | #ifdef LV_HAVE_SSE2 | ||
236 | #include <emmintrin.h> | ||
237 | |||
238 | 2 | static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, | |
239 | double* qBuffer, | ||
240 | const lv_32fc_t* complexVector, | ||
241 | unsigned int num_points) | ||
242 | { | ||
243 | 2 | unsigned int number = 0; | |
244 | |||
245 | 2 | const float* complexVectorPtr = (float*)complexVector; | |
246 | 2 | double* iBufferPtr = iBuffer; | |
247 | 2 | double* qBufferPtr = qBuffer; | |
248 | |||
249 | 2 | const unsigned int halfPoints = num_points / 2; | |
250 | __m128 cplxValue, fVal; | ||
251 | __m128d dVal; | ||
252 | |||
253 |
2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
|
131072 | for (; number < halfPoints; number++) { |
254 | |||
255 | 131070 | cplxValue = _mm_load_ps(complexVectorPtr); | |
256 | 131070 | complexVectorPtr += 4; | |
257 | |||
258 | // Arrange in i1i2i1i2 format | ||
259 | 131070 | fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0)); | |
260 | 131070 | dVal = _mm_cvtps_pd(fVal); | |
261 | _mm_store_pd(iBufferPtr, dVal); | ||
262 | |||
263 | // Arrange in q1q2q1q2 format | ||
264 | 131070 | fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1)); | |
265 | 131070 | dVal = _mm_cvtps_pd(fVal); | |
266 | _mm_store_pd(qBufferPtr, dVal); | ||
267 | |||
268 | 131070 | iBufferPtr += 2; | |
269 | 131070 | qBufferPtr += 2; | |
270 | } | ||
271 | |||
272 | 2 | number = halfPoints * 2; | |
273 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
|
4 | for (; number < num_points; number++) { |
274 | 2 | *iBufferPtr++ = *complexVectorPtr++; | |
275 | 2 | *qBufferPtr++ = *complexVectorPtr++; | |
276 | } | ||
277 | 2 | } | |
278 | #endif /* LV_HAVE_SSE */ | ||
279 | |||
280 | #ifdef LV_HAVE_GENERIC | ||
281 | |||
282 | 2 | static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer, | |
283 | double* qBuffer, | ||
284 | const lv_32fc_t* complexVector, | ||
285 | unsigned int num_points) | ||
286 | { | ||
287 | 2 | unsigned int number = 0; | |
288 | 2 | const float* complexVectorPtr = (float*)complexVector; | |
289 | 2 | double* iBufferPtr = iBuffer; | |
290 | 2 | double* qBufferPtr = qBuffer; | |
291 | |||
292 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
293 | 262142 | *iBufferPtr++ = (double)*complexVectorPtr++; | |
294 | 262142 | *qBufferPtr++ = (double)*complexVectorPtr++; | |
295 | } | ||
296 | 2 | } | |
297 | #endif /* LV_HAVE_GENERIC */ | ||
298 | |||
299 | #ifdef LV_HAVE_NEONV8 | ||
300 | #include <arm_neon.h> | ||
301 | |||
302 | static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer, | ||
303 | double* qBuffer, | ||
304 | const lv_32fc_t* complexVector, | ||
305 | unsigned int num_points) | ||
306 | { | ||
307 | unsigned int number = 0; | ||
308 | unsigned int half_points = num_points / 2; | ||
309 | const float* complexVectorPtr = (float*)complexVector; | ||
310 | double* iBufferPtr = iBuffer; | ||
311 | double* qBufferPtr = qBuffer; | ||
312 | float32x2x2_t complexInput; | ||
313 | float64x2_t iVal, qVal; | ||
314 | |||
315 | for (number = 0; number < half_points; number++) { | ||
316 | complexInput = vld2_f32(complexVectorPtr); | ||
317 | |||
318 | iVal = vcvt_f64_f32(complexInput.val[0]); | ||
319 | qVal = vcvt_f64_f32(complexInput.val[1]); | ||
320 | |||
321 | vst1q_f64(iBufferPtr, iVal); | ||
322 | vst1q_f64(qBufferPtr, qVal); | ||
323 | |||
324 | complexVectorPtr += 4; | ||
325 | iBufferPtr += 2; | ||
326 | qBufferPtr += 2; | ||
327 | } | ||
328 | |||
329 | for (number = half_points * 2; number < num_points; number++) { | ||
330 | *iBufferPtr++ = (double)*complexVectorPtr++; | ||
331 | *qBufferPtr++ = (double)*complexVectorPtr++; | ||
332 | } | ||
333 | } | ||
334 | #endif /* LV_HAVE_NEONV8 */ | ||
335 | |||
336 | #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */ | ||
337 |