GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_s32f_atan2_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 29 29 100.0%
Functions: 3 3 100.0%
Branches: 6 6 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_s32f_atan2_32f
12 *
13 * \b Overview
14 *
15 * Computes the arctan for each value in a complex vector and applies
16 * a normalization factor.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32fc_s32f_atan2_32f(float* outputVector, const lv_32fc_t* complexVector,
21 * const float normalizeFactor, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li inputVector: The byte-aligned input vector containing interleaved IQ data (I = cos,
25 * Q = sin). \li normalizeFactor: The atan results are divided by this normalization
26 * factor. \li num_points: The number of complex values in \p inputVector.
27 *
28 * \b Outputs
29 * \li outputVector: The vector where the results will be stored.
30 *
31 * \b Example
32 * Calculate the arctangent of points around the unit circle.
33 * \code
34 * int N = 10;
35 * unsigned int alignment = volk_get_alignment();
36 * lv_32fc_t* in = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
37 * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
38 * float scale = 1.f; // we want unit circle
39 *
40 * for(unsigned int ii = 0; ii < N/2; ++ii){
41 * // Generate points around the unit circle
42 * float real = -4.f * ((float)ii / (float)N) + 1.f;
43 * float imag = std::sqrt(1.f - real * real);
44 * in[ii] = lv_cmake(real, imag);
45 * in[ii+N/2] = lv_cmake(-real, -imag);
46 * }
47 *
48 * volk_32fc_s32f_atan2_32f(out, in, scale, N);
49 *
50 * for(unsigned int ii = 0; ii < N; ++ii){
51 * printf("atan2(%1.2f, %1.2f) = %1.2f\n",
52 * lv_cimag(in[ii]), lv_creal(in[ii]), out[ii]);
53 * }
54 *
55 * volk_free(in);
56 * volk_free(out);
57 * \endcode
58 */
59
60
61 #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
62 #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
63
64 #include <inttypes.h>
65 #include <math.h>
66 #include <stdio.h>
67
68 #ifdef LV_HAVE_SSE4_1
69 #include <smmintrin.h>
70
71 #ifdef LV_HAVE_LIB_SIMDMATH
72 #include <simdmath.h>
73 #endif /* LV_HAVE_LIB_SIMDMATH */
74
75 2 static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,
76 const lv_32fc_t* complexVector,
77 const float normalizeFactor,
78 unsigned int num_points)
79 {
80 2 const float* complexVectorPtr = (float*)complexVector;
81 2 float* outPtr = outputVector;
82
83 2 unsigned int number = 0;
84 2 const float invNormalizeFactor = 1.0 / normalizeFactor;
85
86 #ifdef LV_HAVE_LIB_SIMDMATH
87 const unsigned int quarterPoints = num_points / 4;
88 __m128 testVector = _mm_set_ps1(2 * M_PI);
89 __m128 correctVector = _mm_set_ps1(M_PI);
90 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
91 __m128 phase;
92 __m128 complex1, complex2, iValue, qValue;
93 __m128 keepMask;
94
95 for (; number < quarterPoints; number++) {
96 // Load IQ data:
97 complex1 = _mm_load_ps(complexVectorPtr);
98 complexVectorPtr += 4;
99 complex2 = _mm_load_ps(complexVectorPtr);
100 complexVectorPtr += 4;
101 // Deinterleave IQ data:
102 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
103 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
104 // Arctan to get phase:
105 phase = atan2f4(qValue, iValue);
106 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
107 // Compare to 2pi:
108 keepMask = _mm_cmpneq_ps(phase, testVector);
109 phase = _mm_blendv_ps(correctVector, phase, keepMask);
110 // done with above correction.
111 phase = _mm_mul_ps(phase, vNormalizeFactor);
112 _mm_store_ps((float*)outPtr, phase);
113 outPtr += 4;
114 }
115 number = quarterPoints * 4;
116 #endif /* LV_HAVE_LIB_SIMDMATH */
117
118
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
119 262142 const float real = *complexVectorPtr++;
120 262142 const float imag = *complexVectorPtr++;
121 262142 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
122 }
123 2 }
124 #endif /* LV_HAVE_SSE4_1 */
125
126
127 #ifdef LV_HAVE_SSE
128 #include <xmmintrin.h>
129
130 #ifdef LV_HAVE_LIB_SIMDMATH
131 #include <simdmath.h>
132 #endif /* LV_HAVE_LIB_SIMDMATH */
133
134 2 static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,
135 const lv_32fc_t* complexVector,
136 const float normalizeFactor,
137 unsigned int num_points)
138 {
139 2 const float* complexVectorPtr = (float*)complexVector;
140 2 float* outPtr = outputVector;
141
142 2 unsigned int number = 0;
143 2 const float invNormalizeFactor = 1.0 / normalizeFactor;
144
145 #ifdef LV_HAVE_LIB_SIMDMATH
146 const unsigned int quarterPoints = num_points / 4;
147 __m128 testVector = _mm_set_ps1(2 * M_PI);
148 __m128 correctVector = _mm_set_ps1(M_PI);
149 __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
150 __m128 phase;
151 __m128 complex1, complex2, iValue, qValue;
152 __m128 mask;
153 __m128 keepMask;
154
155 for (; number < quarterPoints; number++) {
156 // Load IQ data:
157 complex1 = _mm_load_ps(complexVectorPtr);
158 complexVectorPtr += 4;
159 complex2 = _mm_load_ps(complexVectorPtr);
160 complexVectorPtr += 4;
161 // Deinterleave IQ data:
162 iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
163 qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
164 // Arctan to get phase:
165 phase = atan2f4(qValue, iValue);
166 // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
167 // Compare to 2pi:
168 keepMask = _mm_cmpneq_ps(phase, testVector);
169 phase = _mm_and_ps(phase, keepMask);
170 mask = _mm_andnot_ps(keepMask, correctVector);
171 phase = _mm_or_ps(phase, mask);
172 // done with above correction.
173 phase = _mm_mul_ps(phase, vNormalizeFactor);
174 _mm_store_ps((float*)outPtr, phase);
175 outPtr += 4;
176 }
177 number = quarterPoints * 4;
178 #endif /* LV_HAVE_LIB_SIMDMATH */
179
180
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
181 262142 const float real = *complexVectorPtr++;
182 262142 const float imag = *complexVectorPtr++;
183 262142 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
184 }
185 2 }
186 #endif /* LV_HAVE_SSE */
187
188 #ifdef LV_HAVE_GENERIC
189
190 2 static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector,
191 const lv_32fc_t* inputVector,
192 const float normalizeFactor,
193 unsigned int num_points)
194 {
195 2 float* outPtr = outputVector;
196 2 const float* inPtr = (float*)inputVector;
197 2 const float invNormalizeFactor = 1.0 / normalizeFactor;
198 unsigned int number;
199
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
200 262142 const float real = *inPtr++;
201 262142 const float imag = *inPtr++;
202 262142 *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
203 }
204 2 }
205 #endif /* LV_HAVE_GENERIC */
206
207
208 #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
209