GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 18 18 100.0%
Functions: 2 2 100.0%
Branches: 2 2 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32fc_s32f_power_spectrum_32f
12 *
13 * \b Overview
14 *
15 * Calculates the log10 power value for each input point.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_32fc_s32f_power_spectrum_32f(float* logPowerOutput, const lv_32fc_t*
20 * complexFFTInput, const float normalizationFactor, unsigned int num_points) \endcode
21 *
22 * \b Inputs
23 * \li complexFFTInput The complex data output from the FFT point.
24 * \li normalizationFactor: This value is divided against all the input values before the
25 * power is calculated. \li num_points: The number of fft data points.
26 *
27 * \b Outputs
28 * \li logPowerOutput: The 10.0 * log10(r*r + i*i) for each data point.
29 *
30 * \b Example
31 * \code
32 * int N = 10000;
33 *
34 * volk_32fc_s32f_power_spectrum_32f();
35 *
36 * volk_free(x);
37 * \endcode
38 */
39
40 #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
41 #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
42
43 #include <inttypes.h>
44 #include <math.h>
45 #include <stdio.h>
46
47 #ifdef LV_HAVE_GENERIC
48
49 static inline void
50 2 volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput,
51 const lv_32fc_t* complexFFTInput,
52 const float normalizationFactor,
53 unsigned int num_points)
54 {
55 // Calculate the Power of the complex point
56 2 const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
57
58 // Calculate dBm
59 // 50 ohm load assumption
60 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
61 // 75 ohm load assumption
62 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
63
64 /*
65 * For generic reference, the code below is a volk-optimized
66 * approach that also leverages a faster log2 calculation
67 * to calculate the log10:
68 * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
69 *
70 * Generic code:
71 *
72 * const float real = *inputPtr++ * iNormalizationFactor;
73 * const float imag = *inputPtr++ * iNormalizationFactor;
74 * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
75 * realFFTDataPointsPtr++;
76 *
77 */
78
79 // Calc mag^2
80 2 volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
81
82 // Finish ((real * real) + (imag * imag)) calculation:
83 2 volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
84
85 // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
86 // * log2(x)
87 2 volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
88 2 volk_32f_s32f_multiply_32f(
89 logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
90 2 }
91 #endif /* LV_HAVE_GENERIC */
92
93 #ifdef LV_HAVE_SSE3
94 #include <pmmintrin.h>
95
96 #ifdef LV_HAVE_LIB_SIMDMATH
97 #include <simdmath.h>
98 #endif /* LV_HAVE_LIB_SIMDMATH */
99
100 static inline void
101 4 volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput,
102 const lv_32fc_t* complexFFTInput,
103 const float normalizationFactor,
104 unsigned int num_points)
105 {
106 4 const float* inputPtr = (const float*)complexFFTInput;
107 4 float* destPtr = logPowerOutput;
108 4 uint64_t number = 0;
109 4 const float iNormalizationFactor = 1.0 / normalizationFactor;
110 #ifdef LV_HAVE_LIB_SIMDMATH
111 __m128 magScalar = _mm_set_ps1(10.0);
112 magScalar = _mm_div_ps(magScalar, logf4(magScalar));
113
114 __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
115
116 __m128 power;
117 __m128 input1, input2;
118 const uint64_t quarterPoints = num_points / 4;
119 for (; number < quarterPoints; number++) {
120 // Load the complex values
121 input1 = _mm_load_ps(inputPtr);
122 inputPtr += 4;
123 input2 = _mm_load_ps(inputPtr);
124 inputPtr += 4;
125
126 // Apply the normalization factor
127 input1 = _mm_mul_ps(input1, invNormalizationFactor);
128 input2 = _mm_mul_ps(input2, invNormalizationFactor);
129
130 // Multiply each value by itself
131 // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
132 input1 = _mm_mul_ps(input1, input1);
133 // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
134 input2 = _mm_mul_ps(input2, input2);
135
136 // Horizontal add, to add (r*r) + (i*i) for each complex value
137 // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
138 power = _mm_hadd_ps(input1, input2);
139
140 // Calculate the natural log power
141 power = logf4(power);
142
143 // Convert to log10 and multiply by 10.0
144 power = _mm_mul_ps(power, magScalar);
145
146 // Store the floating point results
147 _mm_store_ps(destPtr, power);
148
149 destPtr += 4;
150 }
151
152 number = quarterPoints * 4;
153 #endif /* LV_HAVE_LIB_SIMDMATH */
154 // Calculate the FFT for any remaining points
155
156
2/2
✓ Branch 0 taken 524284 times.
✓ Branch 1 taken 4 times.
524288 for (; number < num_points; number++) {
157 // Calculate dBm
158 // 50 ohm load assumption
159 // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
160 // 75 ohm load assumption
161 // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
162
163 524284 const float real = *inputPtr++ * iNormalizationFactor;
164 524284 const float imag = *inputPtr++ * iNormalizationFactor;
165
166 524284 *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
167
168 524284 destPtr++;
169 }
170 4 }
171 #endif /* LV_HAVE_SSE3 */
172
173 #ifdef LV_HAVE_NEON
174 #include <arm_neon.h>
175 #include <volk/volk_neon_intrinsics.h>
176
177 static inline void
178 volk_32fc_s32f_power_spectrum_32f_neon(float* logPowerOutput,
179 const lv_32fc_t* complexFFTInput,
180 const float normalizationFactor,
181 unsigned int num_points)
182 {
183 float* logPowerOutputPtr = logPowerOutput;
184 const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
185 const float iNormalizationFactor = 1.0 / normalizationFactor;
186 unsigned int number;
187 unsigned int quarter_points = num_points / 4;
188 float32x4x2_t fft_vec;
189 float32x4_t log_pwr_vec;
190 float32x4_t mag_squared_vec;
191
192 const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
193
194 for (number = 0; number < quarter_points; number++) {
195 // Load
196 fft_vec = vld2q_f32((float*)complexFFTInputPtr);
197 // Prefetch next 4
198 __VOLK_PREFETCH(complexFFTInputPtr + 4);
199 // Normalize
200 fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
201 fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
202 mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
203 log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
204 // Store
205 vst1q_f32(logPowerOutputPtr, log_pwr_vec);
206 // Move pointers ahead
207 complexFFTInputPtr += 4;
208 logPowerOutputPtr += 4;
209 }
210
211 // deal with the rest
212 for (number = quarter_points * 4; number < num_points; number++) {
213 const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
214 const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
215
216 *logPowerOutputPtr =
217 volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
218 complexFFTInputPtr++;
219 logPowerOutputPtr++;
220 }
221 }
222
223 #endif /* LV_HAVE_NEON */
224
225 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
226