GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_deinterleave_real_64f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 60 60 100.0%
Functions: 4 4 100.0%
Branches: 14 14 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_deinterleave_real_64f
12 *
13 * \b Overview
14 *
15 * Deinterleaves the complex floating point vector and return the real
16 * part (inphase) of the samples that have been converted to doubles.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_deinterleave_real_64f(double* iBuffer, const lv_32fc_t*
21 * complexVector, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexVector: The complex input vector.
25 * \li num_points: The number of complex data values to be deinterleaved.
26 *
27 * \b Outputs
28 * \li iBuffer: The I buffer output data.
29 *
30 * \b Example
31 * \code
32 * Generate complex numbers around the top half of the unit circle and
33 * extract all of the real parts to a double buffer.
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
38 * double* re = (double*)volk_malloc(sizeof(double)*N, alignment);
39 *
40 * for(unsigned int ii = 0; ii < N; ++ii){
41 * float real = 2.f * ((float)ii / (float)N) - 1.f;
42 * float imag = std::sqrt(1.f - real * real);
43 * in[ii] = lv_cmake(real, imag);
44 * }
45 *
46 * volk_32fc_deinterleave_real_64f(re, in, N);
47 *
48 * printf(" real part\n");
49 * for(unsigned int ii = 0; ii < N; ++ii){
50 * printf("out(%i) = %+.1g\n", ii, re[ii]);
51 * }
52 *
53 * volk_free(in);
54 * volk_free(re);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
59 #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
60
61 #include <inttypes.h>
62 #include <stdio.h>
63
64 #ifdef LV_HAVE_AVX2
65 #include <immintrin.h>
66
67 2 static inline void volk_32fc_deinterleave_real_64f_a_avx2(double* iBuffer,
68 const lv_32fc_t* complexVector,
69 unsigned int num_points)
70 {
71 2 unsigned int number = 0;
72
73 2 const float* complexVectorPtr = (float*)complexVector;
74 2 double* iBufferPtr = iBuffer;
75
76 2 const unsigned int quarterPoints = num_points / 4;
77 __m256 cplxValue;
78 __m128 fVal;
79 __m256d dVal;
80 2 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
81
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
82
83 65534 cplxValue = _mm256_load_ps(complexVectorPtr);
84 65534 complexVectorPtr += 8;
85
86 // Arrange in i1i2i1i2 format
87 65534 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
88 65534 fVal = _mm256_extractf128_ps(cplxValue, 0);
89 65534 dVal = _mm256_cvtps_pd(fVal);
90 _mm256_store_pd(iBufferPtr, dVal);
91
92 65534 iBufferPtr += 4;
93 }
94
95 2 number = quarterPoints * 4;
96
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
97 6 *iBufferPtr++ = (double)*complexVectorPtr++;
98 6 complexVectorPtr++;
99 }
100 2 }
101 #endif /* LV_HAVE_AVX2 */
102
103 #ifdef LV_HAVE_SSE2
104 #include <emmintrin.h>
105
106 2 static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer,
107 const lv_32fc_t* complexVector,
108 unsigned int num_points)
109 {
110 2 unsigned int number = 0;
111
112 2 const float* complexVectorPtr = (float*)complexVector;
113 2 double* iBufferPtr = iBuffer;
114
115 2 const unsigned int halfPoints = num_points / 2;
116 __m128 cplxValue, fVal;
117 __m128d dVal;
118
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < halfPoints; number++) {
119
120 131070 cplxValue = _mm_load_ps(complexVectorPtr);
121 131070 complexVectorPtr += 4;
122
123 // Arrange in i1i2i1i2 format
124 131070 fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
125 131070 dVal = _mm_cvtps_pd(fVal);
126 _mm_store_pd(iBufferPtr, dVal);
127
128 131070 iBufferPtr += 2;
129 }
130
131 2 number = halfPoints * 2;
132
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (; number < num_points; number++) {
133 2 *iBufferPtr++ = (double)*complexVectorPtr++;
134 2 complexVectorPtr++;
135 }
136 2 }
137 #endif /* LV_HAVE_SSE */
138
139 #ifdef LV_HAVE_GENERIC
140
141 2 static inline void volk_32fc_deinterleave_real_64f_generic(double* iBuffer,
142 const lv_32fc_t* complexVector,
143 unsigned int num_points)
144 {
145 2 unsigned int number = 0;
146 2 const float* complexVectorPtr = (float*)complexVector;
147 2 double* iBufferPtr = iBuffer;
148
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
149 262142 *iBufferPtr++ = (double)*complexVectorPtr++;
150 262142 complexVectorPtr++;
151 }
152 2 }
153 #endif /* LV_HAVE_GENERIC */
154
155 #ifdef LV_HAVE_NEONV8
156 #include <arm_neon.h>
157
158 static inline void volk_32fc_deinterleave_real_64f_neon(double* iBuffer,
159 const lv_32fc_t* complexVector,
160 unsigned int num_points)
161 {
162 unsigned int number = 0;
163 unsigned int quarter_points = num_points / 4;
164 const float* complexVectorPtr = (float*)complexVector;
165 double* iBufferPtr = iBuffer;
166 float32x2x4_t complexInput;
167 float64x2_t iVal1;
168 float64x2_t iVal2;
169 float64x2x2_t iVal;
170
171 for (number = 0; number < quarter_points; number++) {
172 // Load data into register
173 complexInput = vld4_f32(complexVectorPtr);
174
175 // Perform single to double precision conversion
176 iVal1 = vcvt_f64_f32(complexInput.val[0]);
177 iVal2 = vcvt_f64_f32(complexInput.val[2]);
178 iVal.val[0] = iVal1;
179 iVal.val[1] = iVal2;
180
181 // Store results into memory buffer
182 vst2q_f64(iBufferPtr, iVal);
183
184 // Update pointers
185 iBufferPtr += 4;
186 complexVectorPtr += 8;
187 }
188
189 for (number = quarter_points * 4; number < num_points; number++) {
190 *iBufferPtr++ = (double)*complexVectorPtr++;
191 complexVectorPtr++;
192 }
193 }
194 #endif /* LV_HAVE_NEON */
195
196 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
197
198 #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
199 #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
200
201 #include <inttypes.h>
202 #include <stdio.h>
203
204 #ifdef LV_HAVE_AVX2
205 #include <immintrin.h>
206
207 2 static inline void volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer,
208 const lv_32fc_t* complexVector,
209 unsigned int num_points)
210 {
211 2 unsigned int number = 0;
212
213 2 const float* complexVectorPtr = (float*)complexVector;
214 2 double* iBufferPtr = iBuffer;
215
216 2 const unsigned int quarterPoints = num_points / 4;
217 __m256 cplxValue;
218 __m128 fVal;
219 __m256d dVal;
220 2 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
221
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
222
223 65534 cplxValue = _mm256_loadu_ps(complexVectorPtr);
224 65534 complexVectorPtr += 8;
225
226 // Arrange in i1i2i1i2 format
227 65534 cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
228 65534 fVal = _mm256_extractf128_ps(cplxValue, 0);
229 65534 dVal = _mm256_cvtps_pd(fVal);
230 _mm256_storeu_pd(iBufferPtr, dVal);
231
232 65534 iBufferPtr += 4;
233 }
234
235 2 number = quarterPoints * 4;
236
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
237 6 *iBufferPtr++ = (double)*complexVectorPtr++;
238 6 complexVectorPtr++;
239 }
240 2 }
241 #endif /* LV_HAVE_AVX2 */
242
243 #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
244