GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	18	18	100.0%
Functions:	2	2	100.0%
Branches:	2	2	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_s32f_power_spectrum_32f
    
       *
    
       * \b Overview
    
       *
    
       * Calculates the log10 power value for each input point.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_s32f_power_spectrum_32f(float* logPowerOutput, const lv_32fc_t*
    
       * complexFFTInput, const float normalizationFactor, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexFFTInput The complex data output from the FFT point.
    
       * \li normalizationFactor: This value is divided against all the input values before the
    
       * power is calculated. \li num_points: The number of fft data points.
    
       *
    
       * \b Outputs
    
       * \li logPowerOutput: The 10.0 * log10(r*r + i*i) for each data point.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * volk_32fc_s32f_power_spectrum_32f();
    
       *
    
       * volk_free(x);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
    
      #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_GENERIC
    
      static inline void
    
      2
      volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput,
    
                                                const lv_32fc_t* complexFFTInput,
    
                                                const float normalizationFactor,
    
                                                unsigned int num_points)
    
      {
    
          // Calculate the Power of the complex point
    
      2
          const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
    
          // Calculate dBm
    
          // 50 ohm load assumption
    
          // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
    
          // 75 ohm load assumption
    
          // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
    
          /*
    
           * For generic reference, the code below is a volk-optimized
    
           * approach that also leverages a faster log2 calculation
    
           * to calculate the log10:
    
           * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
    
           *
    
           * Generic code:
    
           *
    
           * const float real = *inputPtr++ * iNormalizationFactor;
    
           * const float imag = *inputPtr++ * iNormalizationFactor;
    
           * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
    
           *  realFFTDataPointsPtr++;
    
           *
    
           */
    
          // Calc mag^2
    
      2
          volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
    
          // Finish ((real * real) + (imag * imag)) calculation:
    
      2
          volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
    
          // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
    
          // * log2(x)
    
      2
          volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
    
      2
          volk_32f_s32f_multiply_32f(
    
              logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
      #include <simdmath.h>
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
      static inline void
    
      4
      volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput,
    
                                               const lv_32fc_t* complexFFTInput,
    
                                               const float normalizationFactor,
    
                                               unsigned int num_points)
    
      {
    
      4
          const float* inputPtr = (const float*)complexFFTInput;
    
      4
          float* destPtr = logPowerOutput;
    
      4
          uint64_t number = 0;
    
      4
          const float iNormalizationFactor = 1.0 / normalizationFactor;
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
          __m128 magScalar = _mm_set_ps1(10.0);
    
          magScalar = _mm_div_ps(magScalar, logf4(magScalar));
    
          __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
    
          __m128 power;
    
          __m128 input1, input2;
    
          const uint64_t quarterPoints = num_points / 4;
    
          for (; number < quarterPoints; number++) {
    
              // Load the complex values
    
              input1 = _mm_load_ps(inputPtr);
    
              inputPtr += 4;
    
              input2 = _mm_load_ps(inputPtr);
    
              inputPtr += 4;
    
              // Apply the normalization factor
    
              input1 = _mm_mul_ps(input1, invNormalizationFactor);
    
              input2 = _mm_mul_ps(input2, invNormalizationFactor);
    
              // Multiply each value by itself
    
              // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
    
              input1 = _mm_mul_ps(input1, input1);
    
              // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
    
              input2 = _mm_mul_ps(input2, input2);
    
              // Horizontal add, to add (r*r) + (i*i) for each complex value
    
              // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
    
              power = _mm_hadd_ps(input1, input2);
    
              // Calculate the natural log power
    
              power = logf4(power);
    
              // Convert to log10 and multiply by 10.0
    
              power = _mm_mul_ps(power, magScalar);
    
              // Store the floating point results
    
              _mm_store_ps(destPtr, power);
    
              destPtr += 4;
    
          }
    
          number = quarterPoints * 4;
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
          // Calculate the FFT for any remaining points
    
        2/2✓ Branch 0 taken 524284 times.
✓ Branch 1 taken 4 times.

      524288
          for (; number < num_points; number++) {
    
              // Calculate dBm
    
              // 50 ohm load assumption
    
              // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
    
              // 75 ohm load assumption
    
              // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
    
      524284
              const float real = *inputPtr++ * iNormalizationFactor;
    
      524284
              const float imag = *inputPtr++ * iNormalizationFactor;
    
      524284
              *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
    
      524284
              destPtr++;
    
          }
    
      4
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      #include <volk/volk_neon_intrinsics.h>
    
      static inline void
    
      volk_32fc_s32f_power_spectrum_32f_neon(float* logPowerOutput,
    
                                             const lv_32fc_t* complexFFTInput,
    
                                             const float normalizationFactor,
    
                                             unsigned int num_points)
    
      {
    
          float* logPowerOutputPtr = logPowerOutput;
    
          const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
    
          const float iNormalizationFactor = 1.0 / normalizationFactor;
    
          unsigned int number;
    
          unsigned int quarter_points = num_points / 4;
    
          float32x4x2_t fft_vec;
    
          float32x4_t log_pwr_vec;
    
          float32x4_t mag_squared_vec;
    
          const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
    
          for (number = 0; number < quarter_points; number++) {
    
              // Load
    
              fft_vec = vld2q_f32((float*)complexFFTInputPtr);
    
              // Prefetch next 4
    
              __VOLK_PREFETCH(complexFFTInputPtr + 4);
    
              // Normalize
    
              fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
    
              fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
    
              mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
    
              log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
    
              // Store
    
              vst1q_f32(logPowerOutputPtr, log_pwr_vec);
    
              // Move pointers ahead
    
              complexFFTInputPtr += 4;
    
              logPowerOutputPtr += 4;
    
          }
    
          // deal with the rest
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
    
              const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
    
              *logPowerOutputPtr =
    
                  volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
    
              complexFFTInputPtr++;
    
              logPowerOutputPtr++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_s32f_power_spectrum_32f
12			*
13			* \b Overview
14			*
15			* Calculates the log10 power value for each input point.
16			*
17			* <b>Dispatcher Prototype</b>
18			* \code
19			* void volk_32fc_s32f_power_spectrum_32f(float* logPowerOutput, const lv_32fc_t*
20			* complexFFTInput, const float normalizationFactor, unsigned int num_points) \endcode
21			*
22			* \b Inputs
23			* \li complexFFTInput The complex data output from the FFT point.
24			* \li normalizationFactor: This value is divided against all the input values before the
25			* power is calculated. \li num_points: The number of fft data points.
26			*
27			* \b Outputs
28			* \li logPowerOutput: The 10.0 * log10(rr + ii) for each data point.
29			*
30			* \b Example
31			* \code
32			* int N = 10000;
33			*
34			* volk_32fc_s32f_power_spectrum_32f();
35			*
36			* volk_free(x);
37			* \endcode
38			*/
39
40			#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
41			#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
42
43			#include <inttypes.h>
44			#include <math.h>
45			#include <stdio.h>
46
47			#ifdef LV_HAVE_GENERIC
48
49			static inline void
50		2	volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput,
51			const lv_32fc_t* complexFFTInput,
52			const float normalizationFactor,
53			unsigned int num_points)
54			{
55			// Calculate the Power of the complex point
56		2	const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
57
58			// Calculate dBm
59			// 50 ohm load assumption
60			// 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
61			// 75 ohm load assumption
62			// 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
63
64			/*
65			* For generic reference, the code below is a volk-optimized
66			* approach that also leverages a faster log2 calculation
67			* to calculate the log10:
68			* nlog10(x) = nlog2(x)/log2(10) = (n/log2(10)) * log2(x)
69			*
70			* Generic code:
71			*
72			* const float real = inputPtr++ iNormalizationFactor;
73			* const float imag = inputPtr++ iNormalizationFactor;
74			* realFFTDataPointsPtr = 10.0log10f(((real real) + (imag * imag)) + 1e-20);
75			* realFFTDataPointsPtr++;
76			*
77			*/
78
79			// Calc mag^2
80		2	volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
81
82			// Finish ((real * real) + (imag * imag)) calculation:
83		2	volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
84
85			// The following calculates 10log10(x) = 10log2(x)/log2(10) = (10/log2(10))
86			// * log2(x)
87		2	volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
88		2	volk_32f_s32f_multiply_32f(
89			logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
90		2	}
91			#endif /* LV_HAVE_GENERIC */
92
93			#ifdef LV_HAVE_SSE3
94			#include <pmmintrin.h>
95
96			#ifdef LV_HAVE_LIB_SIMDMATH
97			#include <simdmath.h>
98			#endif /* LV_HAVE_LIB_SIMDMATH */
99
100			static inline void
101		4	volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput,
102			const lv_32fc_t* complexFFTInput,
103			const float normalizationFactor,
104			unsigned int num_points)
105			{
106		4	const float* inputPtr = (const float*)complexFFTInput;
107		4	float* destPtr = logPowerOutput;
108		4	uint64_t number = 0;
109		4	const float iNormalizationFactor = 1.0 / normalizationFactor;
110			#ifdef LV_HAVE_LIB_SIMDMATH
111			__m128 magScalar = _mm_set_ps1(10.0);
112			magScalar = _mm_div_ps(magScalar, logf4(magScalar));
113
114			__m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
115
116			__m128 power;
117			__m128 input1, input2;
118			const uint64_t quarterPoints = num_points / 4;
119			for (; number < quarterPoints; number++) {
120			// Load the complex values
121			input1 = _mm_load_ps(inputPtr);
122			inputPtr += 4;
123			input2 = _mm_load_ps(inputPtr);
124			inputPtr += 4;
125
126			// Apply the normalization factor
127			input1 = _mm_mul_ps(input1, invNormalizationFactor);
128			input2 = _mm_mul_ps(input2, invNormalizationFactor);
129
130			// Multiply each value by itself
131			// (r1r1), (i1i1), (r2r2), (i2i2)
132			input1 = _mm_mul_ps(input1, input1);
133			// (r3r3), (i3i3), (r4r4), (i4i4)
134			input2 = _mm_mul_ps(input2, input2);
135
136			// Horizontal add, to add (rr) + (ii) for each complex value
137			// (r1r1)+(i1i1), (r2r2) + (i2i2), (r3r3)+(i3i3), (r4r4)+(i4i4)
138			power = _mm_hadd_ps(input1, input2);
139
140			// Calculate the natural log power
141			power = logf4(power);
142
143			// Convert to log10 and multiply by 10.0
144			power = _mm_mul_ps(power, magScalar);
145
146			// Store the floating point results
147			_mm_store_ps(destPtr, power);
148
149			destPtr += 4;
150			}
151
152			number = quarterPoints * 4;
153			#endif /* LV_HAVE_LIB_SIMDMATH */
154			// Calculate the FFT for any remaining points
155
156	2/2 ✓ Branch 0 taken 524284 times. ✓ Branch 1 taken 4 times.	524288	for (; number < num_points; number++) {
157			// Calculate dBm
158			// 50 ohm load assumption
159			// 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
160			// 75 ohm load assumption
161			// 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
162
163		524284	const float real = inputPtr++ iNormalizationFactor;
164		524284	const float imag = inputPtr++ iNormalizationFactor;
165
166		524284	destPtr = volk_log2to10factor log2f_non_ieee(((real * real) + (imag * imag)));
167
168		524284	destPtr++;
169			}
170		4	}
171			#endif /* LV_HAVE_SSE3 */
172
173			#ifdef LV_HAVE_NEON
174			#include <arm_neon.h>
175			#include <volk/volk_neon_intrinsics.h>
176
177			static inline void
178			volk_32fc_s32f_power_spectrum_32f_neon(float* logPowerOutput,
179			const lv_32fc_t* complexFFTInput,
180			const float normalizationFactor,
181			unsigned int num_points)
182			{
183			float* logPowerOutputPtr = logPowerOutput;
184			const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
185			const float iNormalizationFactor = 1.0 / normalizationFactor;
186			unsigned int number;
187			unsigned int quarter_points = num_points / 4;
188			float32x4x2_t fft_vec;
189			float32x4_t log_pwr_vec;
190			float32x4_t mag_squared_vec;
191
192			const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
193
194			for (number = 0; number < quarter_points; number++) {
195			// Load
196			fft_vec = vld2q_f32((float*)complexFFTInputPtr);
197			// Prefetch next 4
198			__VOLK_PREFETCH(complexFFTInputPtr + 4);
199			// Normalize
200			fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
201			fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
202			mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
203			log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
204			// Store
205			vst1q_f32(logPowerOutputPtr, log_pwr_vec);
206			// Move pointers ahead
207			complexFFTInputPtr += 4;
208			logPowerOutputPtr += 4;
209			}
210
211			// deal with the rest
212			for (number = quarter_points * 4; number < num_points; number++) {
213			const float real = lv_creal(complexFFTInputPtr) iNormalizationFactor;
214			const float imag = lv_cimag(complexFFTInputPtr) iNormalizationFactor;
215
216			*logPowerOutputPtr =
217			volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
218			complexFFTInputPtr++;
219			logPowerOutputPtr++;
220			}
221			}
222
223			#endif /* LV_HAVE_NEON */
224
225			#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
226