GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 31 32 96.9%
Functions: 3 3 100.0%
Branches: 5 6 83.3%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_s32f_x2_power_spectral_density_32f
12 *
13 * \b Overview
14 *
15 * Calculates the log10 power value divided by the RBW for each input point.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_32fc_s32f_x2_power_spectral_density_32f(float* logPowerOutput, const
20 * lv_32fc_t* complexFFTInput, const float normalizationFactor, const float rbw, unsigned
21 * int num_points) \endcode
22 *
23 * \b Inputs
24 * \li complexFFTInput The complex data output from the FFT point.
25 * \li normalizationFactor: This value is divided against all the input values before the
26 * power is calculated. \li rbw: The resolution bandwidth of the fft spectrum \li
27 * num_points: The number of fft data points.
28 *
29 * \b Outputs
30 * \li logPowerOutput: The 10.0 * log10((r*r + i*i)/RBW) for each data point.
31 *
32 * \b Example
33 * \code
34 * int N = 10000;
35 *
36 * volk_32fc_s32f_x2_power_spectral_density_32f();
37 *
38 * volk_free(x);
39 * \endcode
40 */
41
42 #ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
43 #define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
44
45 #include <inttypes.h>
46 #include <math.h>
47 #include <stdio.h>
48
49 #ifdef LV_HAVE_AVX
50 #include <immintrin.h>
51
52 #ifdef LV_HAVE_LIB_SIMDMATH
53 #include <simdmath.h>
54 #endif /* LV_HAVE_LIB_SIMDMATH */
55
56 static inline void
57 2 volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float* logPowerOutput,
58 const lv_32fc_t* complexFFTInput,
59 const float normalizationFactor,
60 const float rbw,
61 unsigned int num_points)
62 {
63 2 const float* inputPtr = (const float*)complexFFTInput;
64 2 float* destPtr = logPowerOutput;
65 2 uint64_t number = 0;
66 2 const float iRBW = 1.0 / rbw;
67 2 const float iNormalizationFactor = 1.0 / normalizationFactor;
68
69 #ifdef LV_HAVE_LIB_SIMDMATH
70 __m256 magScalar = _mm256_set1_ps(10.0);
71 magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
72
73 __m256 invRBW = _mm256_set1_ps(iRBW);
74
75 __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
76
77 __m256 power;
78 __m256 input1, input2;
79 const uint64_t eighthPoints = num_points / 8;
80 for (; number < eighthPoints; number++) {
81 // Load the complex values
82 input1 = _mm256_load_ps(inputPtr);
83 inputPtr += 8;
84 input2 = _mm256_load_ps(inputPtr);
85 inputPtr += 8;
86
87 // Apply the normalization factor
88 input1 = _mm256_mul_ps(input1, invNormalizationFactor);
89 input2 = _mm256_mul_ps(input2, invNormalizationFactor);
90
91 // Multiply each value by itself
92 // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
93 input1 = _mm256_mul_ps(input1, input1);
94 // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
95 input2 = _mm256_mul_ps(input2, input2);
96
97 // Horizontal add, to add (r*r) + (i*i) for each complex value
98 // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
99 inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
100 inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
101
102 power = _mm256_hadd_ps(inputVal1, inputVal2);
103
104 // Divide by the rbw
105 power = _mm256_mul_ps(power, invRBW);
106
107 // Calculate the natural log power
108 power = logf4(power);
109
110 // Convert to log10 and multiply by 10.0
111 power = _mm256_mul_ps(power, magScalar);
112
113 // Store the floating point results
114 _mm256_store_ps(destPtr, power);
115
116 destPtr += 8;
117 }
118
119 number = eighthPoints * 8;
120 #endif /* LV_HAVE_LIB_SIMDMATH */
121 // Calculate the FFT for any remaining points
122
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
123 // Calculate dBm
124 // 50 ohm load assumption
125 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
126 // 75 ohm load assumption
127 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
128
129 262142 const float real = *inputPtr++ * iNormalizationFactor;
130 262142 const float imag = *inputPtr++ * iNormalizationFactor;
131
132 262142 *destPtr = volk_log2to10factor *
133 262142 log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
134 262142 destPtr++;
135 }
136 2 }
137 #endif /* LV_HAVE_AVX */
138
139 #ifdef LV_HAVE_SSE3
140 #include <pmmintrin.h>
141
142
143 #ifdef LV_HAVE_LIB_SIMDMATH
144 #include <simdmath.h>
145 #endif /* LV_HAVE_LIB_SIMDMATH */
146
147 static inline void
148 2 volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput,
149 const lv_32fc_t* complexFFTInput,
150 const float normalizationFactor,
151 const float rbw,
152 unsigned int num_points)
153 {
154 2 const float* inputPtr = (const float*)complexFFTInput;
155 2 float* destPtr = logPowerOutput;
156 2 uint64_t number = 0;
157 2 const float iRBW = 1.0 / rbw;
158 2 const float iNormalizationFactor = 1.0 / normalizationFactor;
159
160 #ifdef LV_HAVE_LIB_SIMDMATH
161 __m128 magScalar = _mm_set_ps1(10.0);
162 magScalar = _mm_div_ps(magScalar, logf4(magScalar));
163
164 __m128 invRBW = _mm_set_ps1(iRBW);
165
166 __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
167
168 __m128 power;
169 __m128 input1, input2;
170 const uint64_t quarterPoints = num_points / 4;
171 for (; number < quarterPoints; number++) {
172 // Load the complex values
173 input1 = _mm_load_ps(inputPtr);
174 inputPtr += 4;
175 input2 = _mm_load_ps(inputPtr);
176 inputPtr += 4;
177
178 // Apply the normalization factor
179 input1 = _mm_mul_ps(input1, invNormalizationFactor);
180 input2 = _mm_mul_ps(input2, invNormalizationFactor);
181
182 // Multiply each value by itself
183 // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
184 input1 = _mm_mul_ps(input1, input1);
185 // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
186 input2 = _mm_mul_ps(input2, input2);
187
188 // Horizontal add, to add (r*r) + (i*i) for each complex value
189 // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
190 power = _mm_hadd_ps(input1, input2);
191
192 // Divide by the rbw
193 power = _mm_mul_ps(power, invRBW);
194
195 // Calculate the natural log power
196 power = logf4(power);
197
198 // Convert to log10 and multiply by 10.0
199 power = _mm_mul_ps(power, magScalar);
200
201 // Store the floating point results
202 _mm_store_ps(destPtr, power);
203
204 destPtr += 4;
205 }
206
207 number = quarterPoints * 4;
208 #endif /* LV_HAVE_LIB_SIMDMATH */
209 // Calculate the FFT for any remaining points
210
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
211 // Calculate dBm
212 // 50 ohm load assumption
213 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
214 // 75 ohm load assumption
215 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
216
217 262142 const float real = *inputPtr++ * iNormalizationFactor;
218 262142 const float imag = *inputPtr++ * iNormalizationFactor;
219
220 262142 *destPtr = volk_log2to10factor *
221 262142 log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
222 262142 destPtr++;
223 }
224 2 }
225 #endif /* LV_HAVE_SSE3 */
226
227
228 #ifdef LV_HAVE_GENERIC
229
230 static inline void
231 2 volk_32fc_s32f_x2_power_spectral_density_32f_generic(float* logPowerOutput,
232 const lv_32fc_t* complexFFTInput,
233 const float normalizationFactor,
234 const float rbw,
235 unsigned int num_points)
236 {
237
1/2
✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.
2 if (rbw != 1.0)
238 2 volk_32fc_s32f_power_spectrum_32f(
239 2 logPowerOutput, complexFFTInput, normalizationFactor * sqrt(rbw), num_points);
240 else
241 volk_32fc_s32f_power_spectrum_32f(
242 logPowerOutput, complexFFTInput, normalizationFactor, num_points);
243 2 }
244
245 #endif /* LV_HAVE_GENERIC */
246
247 #endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
248