GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_s32f_atan2_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	29	29	100.0%
Functions:	3	3	100.0%
Branches:	6	6	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_s32f_atan2_32f
    
       *
    
       * \b Overview
    
       *
    
       * Computes the arctan for each value in a complex vector and applies
    
       * a normalization factor.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_s32f_atan2_32f(float* outputVector, const lv_32fc_t* complexVector,
    
       * const float normalizeFactor, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li inputVector: The byte-aligned input vector containing interleaved IQ data (I = cos,
    
       * Q = sin). \li normalizeFactor: The atan results are divided by this normalization
    
       * factor. \li num_points: The number of complex values in \p inputVector.
    
       *
    
       * \b Outputs
    
       * \li outputVector: The vector where the results will be stored.
    
       *
    
       * \b Example
    
       * Calculate the arctangent of points around the unit circle.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float scale = 1.f; // we want unit circle
    
       *
    
       *   for(unsigned int ii = 0; ii < N/2; ++ii){
    
       *       // Generate points around the unit circle
    
       *       float real = -4.f * ((float)ii / (float)N) + 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *       in[ii+N/2] = lv_cmake(-real, -imag);
    
       *   }
    
       *
    
       *   volk_32fc_s32f_atan2_32f(out, in, scale, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("atan2(%1.2f, %1.2f) = %1.2f\n",
    
       *           lv_cimag(in[ii]), lv_creal(in[ii]), out[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
    
      #define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE4_1
    
      #include <smmintrin.h>
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
      #include <simdmath.h>
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
      2
      static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,
    
                                                           const lv_32fc_t* complexVector,
    
                                                           const float normalizeFactor,
    
                                                           unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* outPtr = outputVector;
    
      2
          unsigned int number = 0;
    
      2
          const float invNormalizeFactor = 1.0 / normalizeFactor;
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 testVector = _mm_set_ps1(2 * M_PI);
    
          __m128 correctVector = _mm_set_ps1(M_PI);
    
          __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
    
          __m128 phase;
    
          __m128 complex1, complex2, iValue, qValue;
    
          __m128 keepMask;
    
          for (; number < quarterPoints; number++) {
    
              // Load IQ data:
    
              complex1 = _mm_load_ps(complexVectorPtr);
    
              complexVectorPtr += 4;
    
              complex2 = _mm_load_ps(complexVectorPtr);
    
              complexVectorPtr += 4;
    
              // Deinterleave IQ data:
    
              iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
    
              qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
    
              // Arctan to get phase:
    
              phase = atan2f4(qValue, iValue);
    
              // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
    
              // Compare to 2pi:
    
              keepMask = _mm_cmpneq_ps(phase, testVector);
    
              phase = _mm_blendv_ps(correctVector, phase, keepMask);
    
              // done with above correction.
    
              phase = _mm_mul_ps(phase, vNormalizeFactor);
    
              _mm_store_ps((float*)outPtr, phase);
    
              outPtr += 4;
    
          }
    
          number = quarterPoints * 4;
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (; number < num_points; number++) {
    
      262142
              const float real = *complexVectorPtr++;
    
      262142
              const float imag = *complexVectorPtr++;
    
      262142
              *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE4_1 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
      #include <simdmath.h>
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
      2
      static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,
    
                                                        const lv_32fc_t* complexVector,
    
                                                        const float normalizeFactor,
    
                                                        unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* outPtr = outputVector;
    
      2
          unsigned int number = 0;
    
      2
          const float invNormalizeFactor = 1.0 / normalizeFactor;
    
      #ifdef LV_HAVE_LIB_SIMDMATH
    
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 testVector = _mm_set_ps1(2 * M_PI);
    
          __m128 correctVector = _mm_set_ps1(M_PI);
    
          __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
    
          __m128 phase;
    
          __m128 complex1, complex2, iValue, qValue;
    
          __m128 mask;
    
          __m128 keepMask;
    
          for (; number < quarterPoints; number++) {
    
              // Load IQ data:
    
              complex1 = _mm_load_ps(complexVectorPtr);
    
              complexVectorPtr += 4;
    
              complex2 = _mm_load_ps(complexVectorPtr);
    
              complexVectorPtr += 4;
    
              // Deinterleave IQ data:
    
              iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
    
              qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
    
              // Arctan to get phase:
    
              phase = atan2f4(qValue, iValue);
    
              // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
    
              // Compare to 2pi:
    
              keepMask = _mm_cmpneq_ps(phase, testVector);
    
              phase = _mm_and_ps(phase, keepMask);
    
              mask = _mm_andnot_ps(keepMask, correctVector);
    
              phase = _mm_or_ps(phase, mask);
    
              // done with above correction.
    
              phase = _mm_mul_ps(phase, vNormalizeFactor);
    
              _mm_store_ps((float*)outPtr, phase);
    
              outPtr += 4;
    
          }
    
          number = quarterPoints * 4;
    
      #endif /* LV_HAVE_LIB_SIMDMATH */
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (; number < num_points; number++) {
    
      262142
              const float real = *complexVectorPtr++;
    
      262142
              const float imag = *complexVectorPtr++;
    
      262142
              *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector,
    
                                                          const lv_32fc_t* inputVector,
    
                                                          const float normalizeFactor,
    
                                                          unsigned int num_points)
    
      {
    
      2
          float* outPtr = outputVector;
    
      2
          const float* inPtr = (float*)inputVector;
    
      2
          const float invNormalizeFactor = 1.0 / normalizeFactor;
    
          unsigned int number;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              const float real = *inPtr++;
    
      262142
              const float imag = *inPtr++;
    
      262142
              *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_s32f_atan2_32f
12			*
13			* \b Overview
14			*
15			* Computes the arctan for each value in a complex vector and applies
16			* a normalization factor.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_s32f_atan2_32f(float* outputVector, const lv_32fc_t* complexVector,
21			* const float normalizeFactor, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li inputVector: The byte-aligned input vector containing interleaved IQ data (I = cos,
25			* Q = sin). \li normalizeFactor: The atan results are divided by this normalization
26			* factor. \li num_points: The number of complex values in \p inputVector.
27			*
28			* \b Outputs
29			* \li outputVector: The vector where the results will be stored.
30			*
31			* \b Example
32			* Calculate the arctangent of points around the unit circle.
33			* \code
34			* int N = 10;
35			* unsigned int alignment = volk_get_alignment();
36			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
37			* float* out = (float)volk_malloc(sizeof(float)N, alignment);
38			* float scale = 1.f; // we want unit circle
39			*
40			* for(unsigned int ii = 0; ii < N/2; ++ii){
41			* // Generate points around the unit circle
42			* float real = -4.f * ((float)ii / (float)N) + 1.f;
43			* float imag = std::sqrt(1.f - real * real);
44			* in[ii] = lv_cmake(real, imag);
45			* in[ii+N/2] = lv_cmake(-real, -imag);
46			* }
47			*
48			* volk_32fc_s32f_atan2_32f(out, in, scale, N);
49			*
50			* for(unsigned int ii = 0; ii < N; ++ii){
51			* printf("atan2(%1.2f, %1.2f) = %1.2f\n",
52			* lv_cimag(in[ii]), lv_creal(in[ii]), out[ii]);
53			* }
54			*
55			* volk_free(in);
56			* volk_free(out);
57			* \endcode
58			*/
59
60
61			#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
62			#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
63
64			#include <inttypes.h>
65			#include <math.h>
66			#include <stdio.h>
67
68			#ifdef LV_HAVE_SSE4_1
69			#include <smmintrin.h>
70
71			#ifdef LV_HAVE_LIB_SIMDMATH
72			#include <simdmath.h>
73			#endif /* LV_HAVE_LIB_SIMDMATH */
74
75		2	static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,
76			const lv_32fc_t* complexVector,
77			const float normalizeFactor,
78			unsigned int num_points)
79			{
80		2	const float* complexVectorPtr = (float*)complexVector;
81		2	float* outPtr = outputVector;
82
83		2	unsigned int number = 0;
84		2	const float invNormalizeFactor = 1.0 / normalizeFactor;
85
86			#ifdef LV_HAVE_LIB_SIMDMATH
87			const unsigned int quarterPoints = num_points / 4;
88			__m128 testVector = _mm_set_ps1(2 * M_PI);
89			__m128 correctVector = _mm_set_ps1(M_PI);
90			__m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
91			__m128 phase;
92			__m128 complex1, complex2, iValue, qValue;
93			__m128 keepMask;
94
95			for (; number < quarterPoints; number++) {
96			// Load IQ data:
97			complex1 = _mm_load_ps(complexVectorPtr);
98			complexVectorPtr += 4;
99			complex2 = _mm_load_ps(complexVectorPtr);
100			complexVectorPtr += 4;
101			// Deinterleave IQ data:
102			iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
103			qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
104			// Arctan to get phase:
105			phase = atan2f4(qValue, iValue);
106			// When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
107			// Compare to 2pi:
108			keepMask = _mm_cmpneq_ps(phase, testVector);
109			phase = _mm_blendv_ps(correctVector, phase, keepMask);
110			// done with above correction.
111			phase = _mm_mul_ps(phase, vNormalizeFactor);
112			_mm_store_ps((float*)outPtr, phase);
113			outPtr += 4;
114			}
115			number = quarterPoints * 4;
116			#endif /* LV_HAVE_LIB_SIMDMATH */
117
118	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (; number < num_points; number++) {
119		262142	const float real = *complexVectorPtr++;
120		262142	const float imag = *complexVectorPtr++;
121		262142	outPtr++ = atan2f(imag, real) invNormalizeFactor;
122			}
123		2	}
124			#endif /* LV_HAVE_SSE4_1 */
125
126
127			#ifdef LV_HAVE_SSE
128			#include <xmmintrin.h>
129
130			#ifdef LV_HAVE_LIB_SIMDMATH
131			#include <simdmath.h>
132			#endif /* LV_HAVE_LIB_SIMDMATH */
133
134		2	static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,
135			const lv_32fc_t* complexVector,
136			const float normalizeFactor,
137			unsigned int num_points)
138			{
139		2	const float* complexVectorPtr = (float*)complexVector;
140		2	float* outPtr = outputVector;
141
142		2	unsigned int number = 0;
143		2	const float invNormalizeFactor = 1.0 / normalizeFactor;
144
145			#ifdef LV_HAVE_LIB_SIMDMATH
146			const unsigned int quarterPoints = num_points / 4;
147			__m128 testVector = _mm_set_ps1(2 * M_PI);
148			__m128 correctVector = _mm_set_ps1(M_PI);
149			__m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
150			__m128 phase;
151			__m128 complex1, complex2, iValue, qValue;
152			__m128 mask;
153			__m128 keepMask;
154
155			for (; number < quarterPoints; number++) {
156			// Load IQ data:
157			complex1 = _mm_load_ps(complexVectorPtr);
158			complexVectorPtr += 4;
159			complex2 = _mm_load_ps(complexVectorPtr);
160			complexVectorPtr += 4;
161			// Deinterleave IQ data:
162			iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2, 0, 2, 0));
163			qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3, 1, 3, 1));
164			// Arctan to get phase:
165			phase = atan2f4(qValue, iValue);
166			// When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
167			// Compare to 2pi:
168			keepMask = _mm_cmpneq_ps(phase, testVector);
169			phase = _mm_and_ps(phase, keepMask);
170			mask = _mm_andnot_ps(keepMask, correctVector);
171			phase = _mm_or_ps(phase, mask);
172			// done with above correction.
173			phase = _mm_mul_ps(phase, vNormalizeFactor);
174			_mm_store_ps((float*)outPtr, phase);
175			outPtr += 4;
176			}
177			number = quarterPoints * 4;
178			#endif /* LV_HAVE_LIB_SIMDMATH */
179
180	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (; number < num_points; number++) {
181		262142	const float real = *complexVectorPtr++;
182		262142	const float imag = *complexVectorPtr++;
183		262142	outPtr++ = atan2f(imag, real) invNormalizeFactor;
184			}
185		2	}
186			#endif /* LV_HAVE_SSE */
187
188			#ifdef LV_HAVE_GENERIC
189
190		2	static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector,
191			const lv_32fc_t* inputVector,
192			const float normalizeFactor,
193			unsigned int num_points)
194			{
195		2	float* outPtr = outputVector;
196		2	const float* inPtr = (float*)inputVector;
197		2	const float invNormalizeFactor = 1.0 / normalizeFactor;
198			unsigned int number;
199	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
200		262142	const float real = *inPtr++;
201		262142	const float imag = *inPtr++;
202		262142	outPtr++ = atan2f(imag, real) invNormalizeFactor;
203			}
204		2	}
205			#endif /* LV_HAVE_GENERIC */
206
207
208			#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
209