GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_magnitude_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	125	125	100.0%
Functions:	8	8	100.0%
Branches:	28	28	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_magnitude_32f
    
       *
    
       * \b Overview
    
       *
    
       * Calculates the magnitude of the complexVector and stores the
    
       * results in the magnitudeVector.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_magnitude_32f(float* magnitudeVector, const lv_32fc_t* complexVector,
    
       * unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li num_points: The number of samples.
    
       *
    
       * \b Outputs
    
       * \li magnitudeVector: The output value.
    
       *
    
       * \b Example
    
       * Calculate the magnitude of \f$x^2 + x\f$ for points around the unit circle.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   float* magnitude = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N/2; ++ii){
    
       *       float real = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *       in[ii] = in[ii] * in[ii] + in[ii];
    
       *       in[N-ii] = lv_cmake(real, imag);
    
       *       in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
    
       *   }
    
       *
    
       *   volk_32fc_magnitude_32f(magnitude, in, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(magnitude);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
    
      #define INCLUDED_volk_32fc_magnitude_32f_u_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      #include <volk/volk_avx_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector,
    
                                                       const lv_32fc_t* complexVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m256 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
    
      32766
              result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
    
              _mm256_storeu_ps(magnitudeVectorPtr, result);
    
      32766
              complexVectorPtr += 16;
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float val1Real = *complexVectorPtr++;
    
      14
              float val1Imag = *complexVectorPtr++;
    
      14
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      #include <volk/volk_sse3_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector,
    
                                                        const lv_32fc_t* complexVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
    
              _mm_storeu_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_SSE
    
      #include <volk/volk_sse_intrinsics.h>
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector,
    
                                                       const lv_32fc_t* complexVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitude_ps(cplxValue1, cplxValue2);
    
              _mm_storeu_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector,
    
                                                         const lv_32fc_t* complexVector,
    
                                                         unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              const float real = *complexVectorPtr++;
    
      262142
              const float imag = *complexVectorPtr++;
    
      262142
              *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
    
      #ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
    
      #define INCLUDED_volk_32fc_magnitude_32f_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      #include <volk/volk_avx_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector,
    
                                                       const lv_32fc_t* complexVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m256 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
    
              _mm256_store_ps(magnitudeVectorPtr, result);
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float val1Real = *complexVectorPtr++;
    
      14
              float val1Imag = *complexVectorPtr++;
    
      14
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      #include <volk/volk_sse3_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector,
    
                                                        const lv_32fc_t* complexVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
    
              _mm_store_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_SSE
    
      #include <volk/volk_sse_intrinsics.h>
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
    
                                                       const lv_32fc_t* complexVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitude_ps(cplxValue1, cplxValue2);
    
              _mm_store_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
    
                                                           const lv_32fc_t* complexVector,
    
                                                           unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              const float real = *complexVectorPtr++;
    
      262142
              const float imag = *complexVectorPtr++;
    
      262142
              *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector,
    
                                                      const lv_32fc_t* complexVector,
    
                                                      unsigned int num_points)
    
      {
    
          unsigned int number;
    
          unsigned int quarter_points = num_points / 4;
    
          const float* complexVectorPtr = (float*)complexVector;
    
          float* magnitudeVectorPtr = magnitudeVector;
    
          float32x4x2_t complex_vec;
    
          float32x4_t magnitude_vec;
    
          for (number = 0; number < quarter_points; number++) {
    
              complex_vec = vld2q_f32(complexVectorPtr);
    
              complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
    
              magnitude_vec =
    
                  vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
    
              magnitude_vec = vrsqrteq_f32(magnitude_vec);
    
              magnitude_vec = vrecpeq_f32(magnitude_vec); // no plain ol' sqrt
    
              vst1q_f32(magnitudeVectorPtr, magnitude_vec);
    
              complexVectorPtr += 8;
    
              magnitudeVectorPtr += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              const float real = *complexVectorPtr++;
    
              const float imag = *complexVectorPtr++;
    
              *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_NEON
    
      /*!
    
        \brief Calculates the magnitude of the complexVector and stores the results in the
    
        magnitudeVector
    
        This is an approximation from "Streamlining Digital Signal Processing" by
    
        Richard Lyons. Apparently max error is about 1% and mean error is about 0.6%.
    
        The basic idea is to do a weighted sum of the abs. value of imag and real parts
    
        where weight A is always assigned to max(imag, real) and B is always min(imag,real).
    
        There are two pairs of cofficients chosen based on whether min <= 0.4142 max.
    
        This method is called equiripple-error magnitude estimation proposed by Filip in '73
    
        \param complexVector The vector containing the complex input values
    
        \param magnitudeVector The vector containing the real output values
    
        \param num_points The number of complex values in complexVector to be calculated and
    
        stored into cVector
    
      */
    
      static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(
    
          float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
    
      {
    
          unsigned int number;
    
          unsigned int quarter_points = num_points / 4;
    
          const float* complexVectorPtr = (float*)complexVector;
    
          float* magnitudeVectorPtr = magnitudeVector;
    
          const float threshold = 0.4142135;
    
          float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
    
          a_high = vdupq_n_f32(0.84);
    
          b_high = vdupq_n_f32(0.561);
    
          a_low = vdupq_n_f32(0.99);
    
          b_low = vdupq_n_f32(0.197);
    
          uint32x4_t comp0, comp1;
    
          float32x4x2_t complex_vec;
    
          float32x4_t min_vec, max_vec, magnitude_vec;
    
          float32x4_t real_abs, imag_abs;
    
          for (number = 0; number < quarter_points; number++) {
    
              complex_vec = vld2q_f32(complexVectorPtr);
    
              real_abs = vabsq_f32(complex_vec.val[0]);
    
              imag_abs = vabsq_f32(complex_vec.val[1]);
    
              min_vec = vminq_f32(real_abs, imag_abs);
    
              max_vec = vmaxq_f32(real_abs, imag_abs);
    
              // effective branch to choose coefficient pair.
    
              comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
    
              comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
    
              // and 0s or 1s with coefficients from previous effective branch
    
              a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
    
                                             vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
    
              b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
    
                                             vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
    
              // coefficients chosen, do the weighted sum
    
              min_vec = vmulq_f32(min_vec, b_vec);
    
              max_vec = vmulq_f32(max_vec, a_vec);
    
              magnitude_vec = vaddq_f32(min_vec, max_vec);
    
              vst1q_f32(magnitudeVectorPtr, magnitude_vec);
    
              complexVectorPtr += 8;
    
              magnitudeVectorPtr += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              const float real = *complexVectorPtr++;
    
              const float imag = *complexVectorPtr++;
    
              *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_magnitude_32f
12			*
13			* \b Overview
14			*
15			* Calculates the magnitude of the complexVector and stores the
16			* results in the magnitudeVector.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_magnitude_32f(float* magnitudeVector, const lv_32fc_t* complexVector,
21			* unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector.
25			* \li num_points: The number of samples.
26			*
27			* \b Outputs
28			* \li magnitudeVector: The output value.
29			*
30			* \b Example
31			* Calculate the magnitude of \f$x^2 + x\f$ for points around the unit circle.
32			* \code
33			* int N = 10;
34			* unsigned int alignment = volk_get_alignment();
35			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
36			* float* magnitude = (float)volk_malloc(sizeof(float)N, alignment);
37			*
38			* for(unsigned int ii = 0; ii < N/2; ++ii){
39			* float real = 2.f * ((float)ii / (float)N) - 1.f;
40			* float imag = std::sqrt(1.f - real * real);
41			* in[ii] = lv_cmake(real, imag);
42			* in[ii] = in[ii] * in[ii] + in[ii];
43			* in[N-ii] = lv_cmake(real, imag);
44			* in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
45			* }
46			*
47			* volk_32fc_magnitude_32f(magnitude, in, N);
48			*
49			* for(unsigned int ii = 0; ii < N; ++ii){
50			* printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
51			* }
52			*
53			* volk_free(in);
54			* volk_free(magnitude);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
59			#define INCLUDED_volk_32fc_magnitude_32f_u_H
60
61			#include <inttypes.h>
62			#include <math.h>
63			#include <stdio.h>
64
65			#ifdef LV_HAVE_AVX
66			#include <immintrin.h>
67			#include <volk/volk_avx_intrinsics.h>
68
69		2	static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector,
70			const lv_32fc_t* complexVector,
71			unsigned int num_points)
72			{
73		2	unsigned int number = 0;
74		2	const unsigned int eighthPoints = num_points / 8;
75
76		2	const float* complexVectorPtr = (float*)complexVector;
77		2	float* magnitudeVectorPtr = magnitudeVector;
78
79			__m256 cplxValue1, cplxValue2, result;
80
81	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
82		32766	cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83		32766	cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
84		32766	result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
85			_mm256_storeu_ps(magnitudeVectorPtr, result);
86
87		32766	complexVectorPtr += 16;
88		32766	magnitudeVectorPtr += 8;
89			}
90
91		2	number = eighthPoints * 8;
92	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
93		14	float val1Real = *complexVectorPtr++;
94		14	float val1Imag = *complexVectorPtr++;
95		14	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
96			}
97		2	}
98			#endif /* LV_HAVE_AVX */
99
100			#ifdef LV_HAVE_SSE3
101			#include <pmmintrin.h>
102			#include <volk/volk_sse3_intrinsics.h>
103
104		2	static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector,
105			const lv_32fc_t* complexVector,
106			unsigned int num_points)
107			{
108		2	unsigned int number = 0;
109		2	const unsigned int quarterPoints = num_points / 4;
110
111		2	const float* complexVectorPtr = (float*)complexVector;
112		2	float* magnitudeVectorPtr = magnitudeVector;
113
114			__m128 cplxValue1, cplxValue2, result;
115	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
116		65534	cplxValue1 = _mm_loadu_ps(complexVectorPtr);
117		65534	complexVectorPtr += 4;
118
119		65534	cplxValue2 = _mm_loadu_ps(complexVectorPtr);
120		65534	complexVectorPtr += 4;
121
122		65534	result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
123
124			_mm_storeu_ps(magnitudeVectorPtr, result);
125		65534	magnitudeVectorPtr += 4;
126			}
127
128		2	number = quarterPoints * 4;
129	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
130		6	float val1Real = *complexVectorPtr++;
131		6	float val1Imag = *complexVectorPtr++;
132		6	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
133			}
134		2	}
135			#endif /* LV_HAVE_SSE3 */
136
137
138			#ifdef LV_HAVE_SSE
139			#include <volk/volk_sse_intrinsics.h>
140			#include <xmmintrin.h>
141
142		2	static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector,
143			const lv_32fc_t* complexVector,
144			unsigned int num_points)
145			{
146		2	unsigned int number = 0;
147		2	const unsigned int quarterPoints = num_points / 4;
148
149		2	const float* complexVectorPtr = (float*)complexVector;
150		2	float* magnitudeVectorPtr = magnitudeVector;
151
152			__m128 cplxValue1, cplxValue2, result;
153
154	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
155		65534	cplxValue1 = _mm_loadu_ps(complexVectorPtr);
156		65534	complexVectorPtr += 4;
157
158		65534	cplxValue2 = _mm_loadu_ps(complexVectorPtr);
159		65534	complexVectorPtr += 4;
160
161		65534	result = _mm_magnitude_ps(cplxValue1, cplxValue2);
162			_mm_storeu_ps(magnitudeVectorPtr, result);
163		65534	magnitudeVectorPtr += 4;
164			}
165
166		2	number = quarterPoints * 4;
167	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
168		6	float val1Real = *complexVectorPtr++;
169		6	float val1Imag = *complexVectorPtr++;
170		6	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
171			}
172		2	}
173			#endif /* LV_HAVE_SSE */
174
175
176			#ifdef LV_HAVE_GENERIC
177
178		2	static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector,
179			const lv_32fc_t* complexVector,
180			unsigned int num_points)
181			{
182		2	const float* complexVectorPtr = (float*)complexVector;
183		2	float* magnitudeVectorPtr = magnitudeVector;
184		2	unsigned int number = 0;
185	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
186		262142	const float real = *complexVectorPtr++;
187		262142	const float imag = *complexVectorPtr++;
188		262142	magnitudeVectorPtr++ = sqrtf((real real) + (imag * imag));
189			}
190		2	}
191			#endif /* LV_HAVE_GENERIC */
192
193
194			#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
195			#ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
196			#define INCLUDED_volk_32fc_magnitude_32f_a_H
197
198			#include <inttypes.h>
199			#include <math.h>
200			#include <stdio.h>
201
202			#ifdef LV_HAVE_AVX
203			#include <immintrin.h>
204			#include <volk/volk_avx_intrinsics.h>
205
206		2	static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector,
207			const lv_32fc_t* complexVector,
208			unsigned int num_points)
209			{
210		2	unsigned int number = 0;
211		2	const unsigned int eighthPoints = num_points / 8;
212
213		2	const float* complexVectorPtr = (float*)complexVector;
214		2	float* magnitudeVectorPtr = magnitudeVector;
215
216			__m256 cplxValue1, cplxValue2, result;
217	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
218		32766	cplxValue1 = _mm256_load_ps(complexVectorPtr);
219		32766	complexVectorPtr += 8;
220
221		32766	cplxValue2 = _mm256_load_ps(complexVectorPtr);
222		32766	complexVectorPtr += 8;
223
224		32766	result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
225			_mm256_store_ps(magnitudeVectorPtr, result);
226		32766	magnitudeVectorPtr += 8;
227			}
228
229		2	number = eighthPoints * 8;
230	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
231		14	float val1Real = *complexVectorPtr++;
232		14	float val1Imag = *complexVectorPtr++;
233		14	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
234			}
235		2	}
236			#endif /* LV_HAVE_AVX */
237
238			#ifdef LV_HAVE_SSE3
239			#include <pmmintrin.h>
240			#include <volk/volk_sse3_intrinsics.h>
241
242		2	static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector,
243			const lv_32fc_t* complexVector,
244			unsigned int num_points)
245			{
246		2	unsigned int number = 0;
247		2	const unsigned int quarterPoints = num_points / 4;
248
249		2	const float* complexVectorPtr = (float*)complexVector;
250		2	float* magnitudeVectorPtr = magnitudeVector;
251
252			__m128 cplxValue1, cplxValue2, result;
253	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
254		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
255		65534	complexVectorPtr += 4;
256
257		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
258		65534	complexVectorPtr += 4;
259
260		65534	result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
261			_mm_store_ps(magnitudeVectorPtr, result);
262		65534	magnitudeVectorPtr += 4;
263			}
264
265		2	number = quarterPoints * 4;
266	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
267		6	float val1Real = *complexVectorPtr++;
268		6	float val1Imag = *complexVectorPtr++;
269		6	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
270			}
271		2	}
272			#endif /* LV_HAVE_SSE3 */
273
274			#ifdef LV_HAVE_SSE
275			#include <volk/volk_sse_intrinsics.h>
276			#include <xmmintrin.h>
277
278		2	static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
279			const lv_32fc_t* complexVector,
280			unsigned int num_points)
281			{
282		2	unsigned int number = 0;
283		2	const unsigned int quarterPoints = num_points / 4;
284
285		2	const float* complexVectorPtr = (float*)complexVector;
286		2	float* magnitudeVectorPtr = magnitudeVector;
287
288			__m128 cplxValue1, cplxValue2, result;
289	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
290		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
291		65534	complexVectorPtr += 4;
292
293		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
294		65534	complexVectorPtr += 4;
295
296		65534	result = _mm_magnitude_ps(cplxValue1, cplxValue2);
297			_mm_store_ps(magnitudeVectorPtr, result);
298		65534	magnitudeVectorPtr += 4;
299			}
300
301		2	number = quarterPoints * 4;
302	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
303		6	float val1Real = *complexVectorPtr++;
304		6	float val1Imag = *complexVectorPtr++;
305		6	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
306			}
307		2	}
308			#endif /* LV_HAVE_SSE */
309
310
311			#ifdef LV_HAVE_GENERIC
312
313		2	static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
314			const lv_32fc_t* complexVector,
315			unsigned int num_points)
316			{
317		2	const float* complexVectorPtr = (float*)complexVector;
318		2	float* magnitudeVectorPtr = magnitudeVector;
319		2	unsigned int number = 0;
320	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
321		262142	const float real = *complexVectorPtr++;
322		262142	const float imag = *complexVectorPtr++;
323		262142	magnitudeVectorPtr++ = sqrtf((real real) + (imag * imag));
324			}
325		2	}
326			#endif /* LV_HAVE_GENERIC */
327
328
329			#ifdef LV_HAVE_NEON
330			#include <arm_neon.h>
331
332			static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector,
333			const lv_32fc_t* complexVector,
334			unsigned int num_points)
335			{
336			unsigned int number;
337			unsigned int quarter_points = num_points / 4;
338			const float* complexVectorPtr = (float*)complexVector;
339			float* magnitudeVectorPtr = magnitudeVector;
340
341			float32x4x2_t complex_vec;
342			float32x4_t magnitude_vec;
343			for (number = 0; number < quarter_points; number++) {
344			complex_vec = vld2q_f32(complexVectorPtr);
345			complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
346			magnitude_vec =
347			vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
348			magnitude_vec = vrsqrteq_f32(magnitude_vec);
349			magnitude_vec = vrecpeq_f32(magnitude_vec); // no plain ol' sqrt
350			vst1q_f32(magnitudeVectorPtr, magnitude_vec);
351
352			complexVectorPtr += 8;
353			magnitudeVectorPtr += 4;
354			}
355
356			for (number = quarter_points * 4; number < num_points; number++) {
357			const float real = *complexVectorPtr++;
358			const float imag = *complexVectorPtr++;
359			magnitudeVectorPtr++ = sqrtf((real real) + (imag * imag));
360			}
361			}
362			#endif /* LV_HAVE_NEON */
363
364
365			#ifdef LV_HAVE_NEON
366			/*!
367			\brief Calculates the magnitude of the complexVector and stores the results in the
368			magnitudeVector
369
370			This is an approximation from "Streamlining Digital Signal Processing" by
371			Richard Lyons. Apparently max error is about 1% and mean error is about 0.6%.
372			The basic idea is to do a weighted sum of the abs. value of imag and real parts
373			where weight A is always assigned to max(imag, real) and B is always min(imag,real).
374			There are two pairs of cofficients chosen based on whether min <= 0.4142 max.
375			This method is called equiripple-error magnitude estimation proposed by Filip in '73
376
377			\param complexVector The vector containing the complex input values
378			\param magnitudeVector The vector containing the real output values
379			\param num_points The number of complex values in complexVector to be calculated and
380			stored into cVector
381			*/
382			static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(
383			float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
384			{
385			unsigned int number;
386			unsigned int quarter_points = num_points / 4;
387			const float* complexVectorPtr = (float*)complexVector;
388			float* magnitudeVectorPtr = magnitudeVector;
389
390			const float threshold = 0.4142135;
391
392			float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
393			a_high = vdupq_n_f32(0.84);
394			b_high = vdupq_n_f32(0.561);
395			a_low = vdupq_n_f32(0.99);
396			b_low = vdupq_n_f32(0.197);
397
398			uint32x4_t comp0, comp1;
399
400			float32x4x2_t complex_vec;
401			float32x4_t min_vec, max_vec, magnitude_vec;
402			float32x4_t real_abs, imag_abs;
403			for (number = 0; number < quarter_points; number++) {
404			complex_vec = vld2q_f32(complexVectorPtr);
405
406			real_abs = vabsq_f32(complex_vec.val[0]);
407			imag_abs = vabsq_f32(complex_vec.val[1]);
408
409			min_vec = vminq_f32(real_abs, imag_abs);
410			max_vec = vmaxq_f32(real_abs, imag_abs);
411
412			// effective branch to choose coefficient pair.
413			comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
414			comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
415
416			// and 0s or 1s with coefficients from previous effective branch
417			a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
418			vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
419			b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
420			vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
421
422			// coefficients chosen, do the weighted sum
423			min_vec = vmulq_f32(min_vec, b_vec);
424			max_vec = vmulq_f32(max_vec, a_vec);
425
426			magnitude_vec = vaddq_f32(min_vec, max_vec);
427			vst1q_f32(magnitudeVectorPtr, magnitude_vec);
428
429			complexVectorPtr += 8;
430			magnitudeVectorPtr += 4;
431			}
432
433			for (number = quarter_points * 4; number < num_points; number++) {
434			const float real = *complexVectorPtr++;
435			const float imag = *complexVectorPtr++;
436			magnitudeVectorPtr++ = sqrtf((real real) + (imag * imag));
437			}
438			}
439			#endif /* LV_HAVE_NEON */
440
441
442			#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
443