GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_deinterleave_64f_x2.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 102 102 100.0%
Functions: 6 6 100.0%
Branches: 20 20 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_deinterleave_64f_x2
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex floating point vector into I & Q vector
16 * data. The output vectors are converted to doubles.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_deinterleave_64f_x2(double* iBuffer, double* qBuffer, const
21 * lv_32fc_t* complexVector, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 * \li qBuffer: The Q buffer output data.
30 *
31 * \b Example
32 * Generate complex numbers around the top half of the unit circle and
33 * deinterleave in to real and imaginary double buffers.
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
38 * double* re = (double*)volk_malloc(sizeof(double)*N, alignment);
39 * double* im = (double*)volk_malloc(sizeof(double)*N, alignment);
40 *
41 * for(unsigned int ii = 0; ii < N; ++ii){
42 * float real = 2.f * ((float)ii / (float)N) - 1.f;
43 * float imag = std::sqrt(1.f - real * real);
44 * in[ii] = lv_cmake(real, imag);
45 * }
46 *
47 * volk_32fc_deinterleave_64f_x2(re, im, in, N);
48 *
49 * printf(" re | im\n");
50 * for(unsigned int ii = 0; ii < N; ++ii){
51 * printf("out(%i) = %+.1g | %+.1g\n", ii, re[ii], im[ii]);
52 * }
53 *
54 * volk_free(in);
55 * volk_free(re);
56 * volk_free(im);
57 * \endcode
58 */
59
60 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
61 #define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
62
63 #include <inttypes.h>
64 #include <stdio.h>
65
66 #ifdef LV_HAVE_AVX
67 #include <immintrin.h>
68
69 2 static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer,
70 double* qBuffer,
71 const lv_32fc_t* complexVector,
72 unsigned int num_points)
73 {
74 2 unsigned int number = 0;
75
76 2 const float* complexVectorPtr = (float*)complexVector;
77 2 double* iBufferPtr = iBuffer;
78 2 double* qBufferPtr = qBuffer;
79
80 2 const unsigned int quarterPoints = num_points / 4;
81 __m256 cplxValue;
82 __m128 complexH, complexL, fVal;
83 __m256d dVal;
84
85
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
86
87 65534 cplxValue = _mm256_loadu_ps(complexVectorPtr);
88 65534 complexVectorPtr += 8;
89
90 65534 complexH = _mm256_extractf128_ps(cplxValue, 1);
91 65534 complexL = _mm256_extractf128_ps(cplxValue, 0);
92
93 // Arrange in i1i2i1i2 format
94 65534 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
95 65534 dVal = _mm256_cvtps_pd(fVal);
96 _mm256_storeu_pd(iBufferPtr, dVal);
97
98 // Arrange in q1q2q1q2 format
99 65534 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
100 65534 dVal = _mm256_cvtps_pd(fVal);
101 _mm256_storeu_pd(qBufferPtr, dVal);
102
103 65534 iBufferPtr += 4;
104 65534 qBufferPtr += 4;
105 }
106
107 2 number = quarterPoints * 4;
108
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
109 6 *iBufferPtr++ = *complexVectorPtr++;
110 6 *qBufferPtr++ = *complexVectorPtr++;
111 }
112 2 }
113 #endif /* LV_HAVE_AVX */
114
115 #ifdef LV_HAVE_SSE2
116 #include <emmintrin.h>
117
118 2 static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer,
119 double* qBuffer,
120 const lv_32fc_t* complexVector,
121 unsigned int num_points)
122 {
123 2 unsigned int number = 0;
124
125 2 const float* complexVectorPtr = (float*)complexVector;
126 2 double* iBufferPtr = iBuffer;
127 2 double* qBufferPtr = qBuffer;
128
129 2 const unsigned int halfPoints = num_points / 2;
130 __m128 cplxValue, fVal;
131 __m128d dVal;
132
133
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < halfPoints; number++) {
134
135 131070 cplxValue = _mm_loadu_ps(complexVectorPtr);
136 131070 complexVectorPtr += 4;
137
138 // Arrange in i1i2i1i2 format
139 131070 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
140 131070 dVal = _mm_cvtps_pd(fVal);
141 _mm_storeu_pd(iBufferPtr, dVal);
142
143 // Arrange in q1q2q1q2 format
144 131070 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
145 131070 dVal = _mm_cvtps_pd(fVal);
146 _mm_storeu_pd(qBufferPtr, dVal);
147
148 131070 iBufferPtr += 2;
149 131070 qBufferPtr += 2;
150 }
151
152 2 number = halfPoints * 2;
153
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (; number < num_points; number++) {
154 2 *iBufferPtr++ = *complexVectorPtr++;
155 2 *qBufferPtr++ = *complexVectorPtr++;
156 }
157 2 }
158 #endif /* LV_HAVE_SSE */
159
160 #ifdef LV_HAVE_GENERIC
161
162 2 static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer,
163 double* qBuffer,
164 const lv_32fc_t* complexVector,
165 unsigned int num_points)
166 {
167 2 unsigned int number = 0;
168 2 const float* complexVectorPtr = (float*)complexVector;
169 2 double* iBufferPtr = iBuffer;
170 2 double* qBufferPtr = qBuffer;
171
172
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
173 262142 *iBufferPtr++ = (double)*complexVectorPtr++;
174 262142 *qBufferPtr++ = (double)*complexVectorPtr++;
175 }
176 2 }
177 #endif /* LV_HAVE_GENERIC */
178
179 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
180 #ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
181 #define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
182
183 #include <inttypes.h>
184 #include <stdio.h>
185
186 #ifdef LV_HAVE_AVX
187 #include <immintrin.h>
188
189 2 static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer,
190 double* qBuffer,
191 const lv_32fc_t* complexVector,
192 unsigned int num_points)
193 {
194 2 unsigned int number = 0;
195
196 2 const float* complexVectorPtr = (float*)complexVector;
197 2 double* iBufferPtr = iBuffer;
198 2 double* qBufferPtr = qBuffer;
199
200 2 const unsigned int quarterPoints = num_points / 4;
201 __m256 cplxValue;
202 __m128 complexH, complexL, fVal;
203 __m256d dVal;
204
205
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
206
207 65534 cplxValue = _mm256_load_ps(complexVectorPtr);
208 65534 complexVectorPtr += 8;
209
210 65534 complexH = _mm256_extractf128_ps(cplxValue, 1);
211 65534 complexL = _mm256_extractf128_ps(cplxValue, 0);
212
213 // Arrange in i1i2i1i2 format
214 65534 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2, 0, 2, 0));
215 65534 dVal = _mm256_cvtps_pd(fVal);
216 _mm256_store_pd(iBufferPtr, dVal);
217
218 // Arrange in q1q2q1q2 format
219 65534 fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3, 1, 3, 1));
220 65534 dVal = _mm256_cvtps_pd(fVal);
221 _mm256_store_pd(qBufferPtr, dVal);
222
223 65534 iBufferPtr += 4;
224 65534 qBufferPtr += 4;
225 }
226
227 2 number = quarterPoints * 4;
228
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
229 6 *iBufferPtr++ = *complexVectorPtr++;
230 6 *qBufferPtr++ = *complexVectorPtr++;
231 }
232 2 }
233 #endif /* LV_HAVE_AVX */
234
235 #ifdef LV_HAVE_SSE2
236 #include <emmintrin.h>
237
238 2 static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer,
239 double* qBuffer,
240 const lv_32fc_t* complexVector,
241 unsigned int num_points)
242 {
243 2 unsigned int number = 0;
244
245 2 const float* complexVectorPtr = (float*)complexVector;
246 2 double* iBufferPtr = iBuffer;
247 2 double* qBufferPtr = qBuffer;
248
249 2 const unsigned int halfPoints = num_points / 2;
250 __m128 cplxValue, fVal;
251 __m128d dVal;
252
253
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < halfPoints; number++) {
254
255 131070 cplxValue = _mm_load_ps(complexVectorPtr);
256 131070 complexVectorPtr += 4;
257
258 // Arrange in i1i2i1i2 format
259 131070 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
260 131070 dVal = _mm_cvtps_pd(fVal);
261 _mm_store_pd(iBufferPtr, dVal);
262
263 // Arrange in q1q2q1q2 format
264 131070 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3, 1, 3, 1));
265 131070 dVal = _mm_cvtps_pd(fVal);
266 _mm_store_pd(qBufferPtr, dVal);
267
268 131070 iBufferPtr += 2;
269 131070 qBufferPtr += 2;
270 }
271
272 2 number = halfPoints * 2;
273
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (; number < num_points; number++) {
274 2 *iBufferPtr++ = *complexVectorPtr++;
275 2 *qBufferPtr++ = *complexVectorPtr++;
276 }
277 2 }
278 #endif /* LV_HAVE_SSE */
279
280 #ifdef LV_HAVE_GENERIC
281
282 2 static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer,
283 double* qBuffer,
284 const lv_32fc_t* complexVector,
285 unsigned int num_points)
286 {
287 2 unsigned int number = 0;
288 2 const float* complexVectorPtr = (float*)complexVector;
289 2 double* iBufferPtr = iBuffer;
290 2 double* qBufferPtr = qBuffer;
291
292
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
293 262142 *iBufferPtr++ = (double)*complexVectorPtr++;
294 262142 *qBufferPtr++ = (double)*complexVectorPtr++;
295 }
296 2 }
297 #endif /* LV_HAVE_GENERIC */
298
299 #ifdef LV_HAVE_NEONV8
300 #include <arm_neon.h>
301
302 static inline void volk_32fc_deinterleave_64f_x2_neon(double* iBuffer,
303 double* qBuffer,
304 const lv_32fc_t* complexVector,
305 unsigned int num_points)
306 {
307 unsigned int number = 0;
308 unsigned int half_points = num_points / 2;
309 const float* complexVectorPtr = (float*)complexVector;
310 double* iBufferPtr = iBuffer;
311 double* qBufferPtr = qBuffer;
312 float32x2x2_t complexInput;
313 float64x2_t iVal, qVal;
314
315 for (number = 0; number < half_points; number++) {
316 complexInput = vld2_f32(complexVectorPtr);
317
318 iVal = vcvt_f64_f32(complexInput.val[0]);
319 qVal = vcvt_f64_f32(complexInput.val[1]);
320
321 vst1q_f64(iBufferPtr, iVal);
322 vst1q_f64(qBufferPtr, qVal);
323
324 complexVectorPtr += 4;
325 iBufferPtr += 2;
326 qBufferPtr += 2;
327 }
328
329 for (number = half_points * 2; number < num_points; number++) {
330 *iBufferPtr++ = (double)*complexVectorPtr++;
331 *qBufferPtr++ = (double)*complexVectorPtr++;
332 }
333 }
334 #endif /* LV_HAVE_NEONV8 */
335
336 #endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
337