GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_deinterleave_real_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 63 63 100.0%
Functions: 4 4 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_deinterleave_real_32f
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex floating point vector and return the real
16 * part (inphase) of the samples.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_deinterleave_real_32f(float* iBuffer, const lv_32fc_t* complexVector,
21 * unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 *
30 * \b Example
31 * Generate complex numbers around the top half of the unit circle and
32 * extract all of the real parts to a float buffer.
33 * \code
34 * int N = 10;
35 * unsigned int alignment = volk_get_alignment();
36 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
37 * float* re = (float*)volk_malloc(sizeof(float)*N, alignment);
38 *
39 * for(unsigned int ii = 0; ii < N; ++ii){
40 * float real = 2.f * ((float)ii / (float)N) - 1.f;
41 * float imag = std::sqrt(1.f - real * real);
42 * in[ii] = lv_cmake(real, imag);
43 * }
44 *
45 * volk_32fc_deinterleave_real_32f(re, in, N);
46 *
47 * printf(" real part\n");
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("out(%i) = %+.1f\n", ii, re[ii]);
50 * }
51 *
52 * volk_free(in);
53 * volk_free(re);
54 * \endcode
55 */
56
57 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
58 #define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
59
60 #include <inttypes.h>
61 #include <stdio.h>
62
63 #ifdef LV_HAVE_AVX2
64 #include <immintrin.h>
65
66 2 static inline void volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer,
67 const lv_32fc_t* complexVector,
68 unsigned int num_points)
69 {
70 2 unsigned int number = 0;
71 2 const unsigned int eighthPoints = num_points / 8;
72
73 2 const float* complexVectorPtr = (const float*)complexVector;
74 2 float* iBufferPtr = iBuffer;
75
76 __m256 cplxValue1, cplxValue2;
77 __m256 iValue;
78 2 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
79
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
80
81 32766 cplxValue1 = _mm256_load_ps(complexVectorPtr);
82 32766 complexVectorPtr += 8;
83
84 32766 cplxValue2 = _mm256_load_ps(complexVectorPtr);
85 32766 complexVectorPtr += 8;
86
87 // Arrange in i1i2i3i4 format
88 32766 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
89 32766 iValue = _mm256_permutevar8x32_ps(iValue, idx);
90
91 _mm256_store_ps(iBufferPtr, iValue);
92
93 32766 iBufferPtr += 8;
94 }
95
96 2 number = eighthPoints * 8;
97
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
98 14 *iBufferPtr++ = *complexVectorPtr++;
99 14 complexVectorPtr++;
100 }
101 2 }
102 #endif /* LV_HAVE_AVX2 */
103
104 #ifdef LV_HAVE_SSE
105 #include <xmmintrin.h>
106
107 2 static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer,
108 const lv_32fc_t* complexVector,
109 unsigned int num_points)
110 {
111 2 unsigned int number = 0;
112 2 const unsigned int quarterPoints = num_points / 4;
113
114 2 const float* complexVectorPtr = (const float*)complexVector;
115 2 float* iBufferPtr = iBuffer;
116
117 __m128 cplxValue1, cplxValue2, iValue;
118
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
119
120 65534 cplxValue1 = _mm_load_ps(complexVectorPtr);
121 65534 complexVectorPtr += 4;
122
123 65534 cplxValue2 = _mm_load_ps(complexVectorPtr);
124 65534 complexVectorPtr += 4;
125
126 // Arrange in i1i2i3i4 format
127 65534 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
128
129 _mm_store_ps(iBufferPtr, iValue);
130
131 65534 iBufferPtr += 4;
132 }
133
134 2 number = quarterPoints * 4;
135
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
136 6 *iBufferPtr++ = *complexVectorPtr++;
137 6 complexVectorPtr++;
138 }
139 2 }
140 #endif /* LV_HAVE_SSE */
141
142
143 #ifdef LV_HAVE_GENERIC
144
145 2 static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer,
146 const lv_32fc_t* complexVector,
147 unsigned int num_points)
148 {
149 2 unsigned int number = 0;
150 2 const float* complexVectorPtr = (float*)complexVector;
151 2 float* iBufferPtr = iBuffer;
152
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
153 262142 *iBufferPtr++ = *complexVectorPtr++;
154 262142 complexVectorPtr++;
155 }
156 2 }
157 #endif /* LV_HAVE_GENERIC */
158
159
160 #ifdef LV_HAVE_NEON
161 #include <arm_neon.h>
162
163 static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer,
164 const lv_32fc_t* complexVector,
165 unsigned int num_points)
166 {
167 unsigned int number = 0;
168 unsigned int quarter_points = num_points / 4;
169 const float* complexVectorPtr = (float*)complexVector;
170 float* iBufferPtr = iBuffer;
171 float32x4x2_t complexInput;
172
173 for (number = 0; number < quarter_points; number++) {
174 complexInput = vld2q_f32(complexVectorPtr);
175 vst1q_f32(iBufferPtr, complexInput.val[0]);
176 complexVectorPtr += 8;
177 iBufferPtr += 4;
178 }
179
180 for (number = quarter_points * 4; number < num_points; number++) {
181 *iBufferPtr++ = *complexVectorPtr++;
182 complexVectorPtr++;
183 }
184 }
185 #endif /* LV_HAVE_NEON */
186
187 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
188
189
190 #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
191 #define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
192
193 #include <inttypes.h>
194 #include <stdio.h>
195
196 #ifdef LV_HAVE_AVX2
197 #include <immintrin.h>
198
199 2 static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
200 const lv_32fc_t* complexVector,
201 unsigned int num_points)
202 {
203 2 unsigned int number = 0;
204 2 const unsigned int eighthPoints = num_points / 8;
205
206 2 const float* complexVectorPtr = (const float*)complexVector;
207 2 float* iBufferPtr = iBuffer;
208
209 __m256 cplxValue1, cplxValue2;
210 __m256 iValue;
211 2 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
212
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
213
214 32766 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
215 32766 complexVectorPtr += 8;
216
217 32766 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
218 32766 complexVectorPtr += 8;
219
220 // Arrange in i1i2i3i4 format
221 32766 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
222 32766 iValue = _mm256_permutevar8x32_ps(iValue, idx);
223
224 _mm256_storeu_ps(iBufferPtr, iValue);
225
226 32766 iBufferPtr += 8;
227 }
228
229 2 number = eighthPoints * 8;
230
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
231 14 *iBufferPtr++ = *complexVectorPtr++;
232 14 complexVectorPtr++;
233 }
234 2 }
235 #endif /* LV_HAVE_AVX2 */
236
237 #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */
238