GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_deinterleave_32f_x2.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 72 72 100.0%
Functions: 4 4 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_deinterleave_32f_x2
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex floating point vector into I & Q vector
16 * data.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_deinterleave_32f_x2(float* iBuffer, float* qBuffer, const lv_32fc_t*
21 * complexVector, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 * \li qBuffer: The Q buffer output data.
30 *
31 * \b Example
32 * Generate complex numbers around the top half of the unit circle and
33 * deinterleave in to real and imaginary buffers.
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
38 * float* re = (float*)volk_malloc(sizeof(float)*N, alignment);
39 * float* im = (float*)volk_malloc(sizeof(float)*N, alignment);
40 *
41 * for(unsigned int ii = 0; ii < N; ++ii){
42 * float real = 2.f * ((float)ii / (float)N) - 1.f;
43 * float imag = std::sqrt(1.f - real * real);
44 * in[ii] = lv_cmake(real, imag);
45 * }
46 *
47 * volk_32fc_deinterleave_32f_x2(re, im, in, N);
48 *
49 * printf(" re | im\n");
50 * for(unsigned int ii = 0; ii < N; ++ii){
51 * printf("out(%i) = %+.1f | %+.1f\n", ii, re[ii], im[ii]);
52 * }
53 *
54 * volk_free(in);
55 * volk_free(re);
56 * volk_free(im);
57 * \endcode
58 */
59
60 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
61 #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
62
63 #include <inttypes.h>
64 #include <stdio.h>
65
66 #ifdef LV_HAVE_AVX
67 #include <immintrin.h>
68 2 static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer,
69 float* qBuffer,
70 const lv_32fc_t* complexVector,
71 unsigned int num_points)
72 {
73 2 const float* complexVectorPtr = (float*)complexVector;
74 2 float* iBufferPtr = iBuffer;
75 2 float* qBufferPtr = qBuffer;
76
77 2 unsigned int number = 0;
78 // Mask for real and imaginary parts
79 2 const unsigned int eighthPoints = num_points / 8;
80 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
81
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
82 32766 cplxValue1 = _mm256_load_ps(complexVectorPtr);
83 32766 complexVectorPtr += 8;
84
85 32766 cplxValue2 = _mm256_load_ps(complexVectorPtr);
86 32766 complexVectorPtr += 8;
87
88 32766 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
89 32766 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
90
91 // Arrange in i1i2i3i4 format
92 32766 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
93 // Arrange in q1q2q3q4 format
94 32766 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
95
96 _mm256_store_ps(iBufferPtr, iValue);
97 _mm256_store_ps(qBufferPtr, qValue);
98
99 32766 iBufferPtr += 8;
100 32766 qBufferPtr += 8;
101 }
102
103 2 number = eighthPoints * 8;
104
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
105 14 *iBufferPtr++ = *complexVectorPtr++;
106 14 *qBufferPtr++ = *complexVectorPtr++;
107 }
108 2 }
109 #endif /* LV_HAVE_AVX */
110
111 #ifdef LV_HAVE_SSE
112 #include <xmmintrin.h>
113
114 2 static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer,
115 float* qBuffer,
116 const lv_32fc_t* complexVector,
117 unsigned int num_points)
118 {
119 2 const float* complexVectorPtr = (float*)complexVector;
120 2 float* iBufferPtr = iBuffer;
121 2 float* qBufferPtr = qBuffer;
122
123 2 unsigned int number = 0;
124 2 const unsigned int quarterPoints = num_points / 4;
125 __m128 cplxValue1, cplxValue2, iValue, qValue;
126
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
127 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
128 65534 complexVectorPtr += 4;
129
130 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
131 65534 complexVectorPtr += 4;
132
133 // Arrange in i1i2i3i4 format
134 65534 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
135 // Arrange in q1q2q3q4 format
136 65534 qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
137
138 _mm_store_ps(iBufferPtr, iValue);
139 _mm_store_ps(qBufferPtr, qValue);
140
141 65534 iBufferPtr += 4;
142 65534 qBufferPtr += 4;
143 }
144
145 2 number = quarterPoints * 4;
146
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
147 6 *iBufferPtr++ = *complexVectorPtr++;
148 6 *qBufferPtr++ = *complexVectorPtr++;
149 }
150 2 }
151 #endif /* LV_HAVE_SSE */
152
153
154 #ifdef LV_HAVE_NEON
155 #include <arm_neon.h>
156
157 static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer,
158 float* qBuffer,
159 const lv_32fc_t* complexVector,
160 unsigned int num_points)
161 {
162 unsigned int number = 0;
163 unsigned int quarter_points = num_points / 4;
164 const float* complexVectorPtr = (float*)complexVector;
165 float* iBufferPtr = iBuffer;
166 float* qBufferPtr = qBuffer;
167 float32x4x2_t complexInput;
168
169 for (number = 0; number < quarter_points; number++) {
170 complexInput = vld2q_f32(complexVectorPtr);
171 vst1q_f32(iBufferPtr, complexInput.val[0]);
172 vst1q_f32(qBufferPtr, complexInput.val[1]);
173 complexVectorPtr += 8;
174 iBufferPtr += 4;
175 qBufferPtr += 4;
176 }
177
178 for (number = quarter_points * 4; number < num_points; number++) {
179 *iBufferPtr++ = *complexVectorPtr++;
180 *qBufferPtr++ = *complexVectorPtr++;
181 }
182 }
183 #endif /* LV_HAVE_NEON */
184
185
186 #ifdef LV_HAVE_GENERIC
187
188 2 static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer,
189 float* qBuffer,
190 const lv_32fc_t* complexVector,
191 unsigned int num_points)
192 {
193 2 const float* complexVectorPtr = (float*)complexVector;
194 2 float* iBufferPtr = iBuffer;
195 2 float* qBufferPtr = qBuffer;
196 unsigned int number;
197
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
198 262142 *iBufferPtr++ = *complexVectorPtr++;
199 262142 *qBufferPtr++ = *complexVectorPtr++;
200 }
201 2 }
202 #endif /* LV_HAVE_GENERIC */
203
204 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
205
206
207 #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
208 #define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
209
210 #include <inttypes.h>
211 #include <stdio.h>
212
213 #ifdef LV_HAVE_AVX
214 #include <immintrin.h>
215 2 static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
216 float* qBuffer,
217 const lv_32fc_t* complexVector,
218 unsigned int num_points)
219 {
220 2 const float* complexVectorPtr = (float*)complexVector;
221 2 float* iBufferPtr = iBuffer;
222 2 float* qBufferPtr = qBuffer;
223
224 2 unsigned int number = 0;
225 // Mask for real and imaginary parts
226 2 const unsigned int eighthPoints = num_points / 8;
227 __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
228
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
229 32766 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
230 32766 complexVectorPtr += 8;
231
232 32766 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
233 32766 complexVectorPtr += 8;
234
235 32766 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
236 32766 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
237
238 // Arrange in i1i2i3i4 format
239 32766 iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
240 // Arrange in q1q2q3q4 format
241 32766 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
242
243 _mm256_storeu_ps(iBufferPtr, iValue);
244 _mm256_storeu_ps(qBufferPtr, qValue);
245
246 32766 iBufferPtr += 8;
247 32766 qBufferPtr += 8;
248 }
249
250 2 number = eighthPoints * 8;
251
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
252 14 *iBufferPtr++ = *complexVectorPtr++;
253 14 *qBufferPtr++ = *complexVectorPtr++;
254 }
255 2 }
256 #endif /* LV_HAVE_AVX */
257 #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
258