GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_x2_interleave_32fc.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 72 72 100.0%
Functions: 4 4 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_x2_interleave_32fc
12 *
13 * \b Overview
14 *
15 * Takes input vector iBuffer as the real (inphase) part and input
16 * vector qBuffer as the imag (quadrature) part and combines them into
17 * a complex output vector.
18 *
19 * c[i] = complex(a[i], b[i])
20 *
21 * <b>Dispatcher Prototype</b>
22 * \code
23 * void volk_32f_x2_interleave_32fc(lv_32fc_t* complexVector, const float* iBuffer, const
24 * float* qBuffer, unsigned int num_points) \endcode
25 *
26 * \b Inputs
27 * \li iBuffer: Input vector of samples for the real part.
28 * \li qBuffer: Input vector of samples for the imaginary part.
29 * \li num_points: The number of values in both input vectors.
30 *
31 * \b Outputs
32 * \li complexVector: The output vector of complex numbers.
33 *
34 * \b Example
35 * Generate the top half of the unit circle with real points equally spaced.
36 * \code
37 * int N = 10;
38 * unsigned int alignment = volk_get_alignment();
39 * float* imag = (float*)volk_malloc(sizeof(float)*N, alignment);
40 * float* real = (float*)volk_malloc(sizeof(float)*N, alignment);
41 * lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
42 *
43 * for(unsigned int ii = 0; ii < N; ++ii){
44 * real[ii] = 2.f * ((float)ii / (float)N) - 1.f;
45 * imag[ii] = std::sqrt(1.f - real[ii] * real[ii]);
46 * }
47 *
48 * volk_32f_x2_interleave_32fc(out, imag, real, N);
49 *
50 * for(unsigned int ii = 0; ii < N; ++ii){
51 * printf("out[%u] = %1.2f + %1.2fj\n", ii, std::real(out[ii]), std::imag(out[ii]));
52 * }
53 *
54 * volk_free(imag);
55 * volk_free(real);
56 * volk_free(out);
57 * \endcode
58 */
59
60 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
61 #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
62
63 #include <inttypes.h>
64 #include <stdio.h>
65
66 #ifdef LV_HAVE_AVX
67 #include <immintrin.h>
68
69 2 static inline void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector,
70 const float* iBuffer,
71 const float* qBuffer,
72 unsigned int num_points)
73 {
74 2 unsigned int number = 0;
75 2 float* complexVectorPtr = (float*)complexVector;
76 2 const float* iBufferPtr = iBuffer;
77 2 const float* qBufferPtr = qBuffer;
78
79 2 const uint64_t eighthPoints = num_points / 8;
80
81 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
82
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
83 32766 iValue = _mm256_load_ps(iBufferPtr);
84 32766 qValue = _mm256_load_ps(qBufferPtr);
85
86 // Interleaves the lower two values in the i and q variables into one buffer
87 32766 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
88 // Interleaves the upper two values in the i and q variables into one buffer
89 32766 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
90
91 32766 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
92 _mm256_store_ps(complexVectorPtr, cplxValue);
93 32766 complexVectorPtr += 8;
94
95 32766 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
96 _mm256_store_ps(complexVectorPtr, cplxValue);
97 32766 complexVectorPtr += 8;
98
99 32766 iBufferPtr += 8;
100 32766 qBufferPtr += 8;
101 }
102
103 2 number = eighthPoints * 8;
104
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
105 14 *complexVectorPtr++ = *iBufferPtr++;
106 14 *complexVectorPtr++ = *qBufferPtr++;
107 }
108 2 }
109
110 #endif /* LV_HAV_AVX */
111
112 #ifdef LV_HAVE_SSE
113 #include <xmmintrin.h>
114
115 2 static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector,
116 const float* iBuffer,
117 const float* qBuffer,
118 unsigned int num_points)
119 {
120 2 unsigned int number = 0;
121 2 float* complexVectorPtr = (float*)complexVector;
122 2 const float* iBufferPtr = iBuffer;
123 2 const float* qBufferPtr = qBuffer;
124
125 2 const uint64_t quarterPoints = num_points / 4;
126
127 __m128 iValue, qValue, cplxValue;
128
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
129 65534 iValue = _mm_load_ps(iBufferPtr);
130 65534 qValue = _mm_load_ps(qBufferPtr);
131
132 // Interleaves the lower two values in the i and q variables into one buffer
133 65534 cplxValue = _mm_unpacklo_ps(iValue, qValue);
134 _mm_store_ps(complexVectorPtr, cplxValue);
135 65534 complexVectorPtr += 4;
136
137 // Interleaves the upper two values in the i and q variables into one buffer
138 65534 cplxValue = _mm_unpackhi_ps(iValue, qValue);
139 _mm_store_ps(complexVectorPtr, cplxValue);
140 65534 complexVectorPtr += 4;
141
142 65534 iBufferPtr += 4;
143 65534 qBufferPtr += 4;
144 }
145
146 2 number = quarterPoints * 4;
147
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
148 6 *complexVectorPtr++ = *iBufferPtr++;
149 6 *complexVectorPtr++ = *qBufferPtr++;
150 }
151 2 }
152 #endif /* LV_HAVE_SSE */
153
154
155 #ifdef LV_HAVE_NEON
156 #include <arm_neon.h>
157
158 static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector,
159 const float* iBuffer,
160 const float* qBuffer,
161 unsigned int num_points)
162 {
163 unsigned int quarter_points = num_points / 4;
164 unsigned int number;
165 float* complexVectorPtr = (float*)complexVector;
166
167 float32x4x2_t complex_vec;
168 for (number = 0; number < quarter_points; ++number) {
169 complex_vec.val[0] = vld1q_f32(iBuffer);
170 complex_vec.val[1] = vld1q_f32(qBuffer);
171 vst2q_f32(complexVectorPtr, complex_vec);
172 iBuffer += 4;
173 qBuffer += 4;
174 complexVectorPtr += 8;
175 }
176
177 for (number = quarter_points * 4; number < num_points; ++number) {
178 *complexVectorPtr++ = *iBuffer++;
179 *complexVectorPtr++ = *qBuffer++;
180 }
181 }
182 #endif /* LV_HAVE_NEON */
183
184
185 #ifdef LV_HAVE_GENERIC
186
187 2 static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector,
188 const float* iBuffer,
189 const float* qBuffer,
190 unsigned int num_points)
191 {
192 2 float* complexVectorPtr = (float*)complexVector;
193 2 const float* iBufferPtr = iBuffer;
194 2 const float* qBufferPtr = qBuffer;
195 unsigned int number;
196
197
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
198 262142 *complexVectorPtr++ = *iBufferPtr++;
199 262142 *complexVectorPtr++ = *qBufferPtr++;
200 }
201 2 }
202 #endif /* LV_HAVE_GENERIC */
203
204
205 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
206
207 #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
208 #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
209
210 #include <inttypes.h>
211 #include <stdio.h>
212
213 #ifdef LV_HAVE_AVX
214 #include <immintrin.h>
215
216 2 static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
217 const float* iBuffer,
218 const float* qBuffer,
219 unsigned int num_points)
220 {
221 2 unsigned int number = 0;
222 2 float* complexVectorPtr = (float*)complexVector;
223 2 const float* iBufferPtr = iBuffer;
224 2 const float* qBufferPtr = qBuffer;
225
226 2 const uint64_t eighthPoints = num_points / 8;
227
228 __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
229
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
230 32766 iValue = _mm256_loadu_ps(iBufferPtr);
231 32766 qValue = _mm256_loadu_ps(qBufferPtr);
232
233 // Interleaves the lower two values in the i and q variables into one buffer
234 32766 cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
235 // Interleaves the upper two values in the i and q variables into one buffer
236 32766 cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
237
238 32766 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
239 _mm256_storeu_ps(complexVectorPtr, cplxValue);
240 32766 complexVectorPtr += 8;
241
242 32766 cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
243 _mm256_storeu_ps(complexVectorPtr, cplxValue);
244 32766 complexVectorPtr += 8;
245
246 32766 iBufferPtr += 8;
247 32766 qBufferPtr += 8;
248 }
249
250 2 number = eighthPoints * 8;
251
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
252 14 *complexVectorPtr++ = *iBufferPtr++;
253 14 *complexVectorPtr++ = *qBufferPtr++;
254 }
255 2 }
256 #endif /* LV_HAVE_AVX */
257
258 #endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
259