GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_magnitude_squared_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	125	125	100.0%
Functions:	8	8	100.0%
Branches:	28	28	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_magnitude_squared_32f
    
       *
    
       * \b Overview
    
       *
    
       * Calculates the magnitude squared of the complexVector and stores
    
       * the results in the magnitudeVector.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_magnitude_squared_32f(float* magnitudeVector, const lv_32fc_t*
    
       * complexVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li num_points: The number of samples.
    
       *
    
       * \b Outputs
    
       * \li magnitudeVector: The output value.
    
       *
    
       * \b Example
    
       * Calculate the magnitude squared of \f$x^2 + x\f$ for points around the unit circle.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   float* magnitude = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N/2; ++ii){
    
       *       float real = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *       in[ii] = in[ii] * in[ii] + in[ii];
    
       *       in[N-ii] = lv_cmake(real, imag);
    
       *       in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
    
       *   }
    
       *
    
       *   volk_32fc_magnitude_32f(magnitude, in, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(magnitude);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
    
      #define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      #include <volk/volk_avx_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector,
    
                                                               const lv_32fc_t* complexVector,
    
                                                               unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m256 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
    
      32766
              result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
    
              _mm256_storeu_ps(magnitudeVectorPtr, result);
    
      32766
              complexVectorPtr += 16;
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float val1Real = *complexVectorPtr++;
    
      14
              float val1Imag = *complexVectorPtr++;
    
      14
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      #include <volk/volk_sse3_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
    
              _mm_storeu_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_SSE
    
      #include <volk/volk_sse_intrinsics.h>
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector,
    
                                                               const lv_32fc_t* complexVector,
    
                                                               unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
    
              _mm_storeu_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector,
    
                                                                 const lv_32fc_t* complexVector,
    
                                                                 unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              const float real = *complexVectorPtr++;
    
      262142
              const float imag = *complexVectorPtr++;
    
      262142
              *magnitudeVectorPtr++ = (real * real) + (imag * imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
    
      #ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
    
      #define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      #include <volk/volk_avx_intrinsics.h>
    
      4
      static inline void volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector,
    
                                                               const lv_32fc_t* complexVector,
    
                                                               unsigned int num_points)
    
      {
    
      4
          unsigned int number = 0;
    
      4
          const unsigned int eighthPoints = num_points / 8;
    
      4
          const float* complexVectorPtr = (float*)complexVector;
    
      4
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m256 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65532 times.
✓ Branch 1 taken 4 times.

      65536
          for (; number < eighthPoints; number++) {
    
      65532
              cplxValue1 = _mm256_load_ps(complexVectorPtr);
    
      65532
              complexVectorPtr += 8;
    
      65532
              cplxValue2 = _mm256_load_ps(complexVectorPtr);
    
      65532
              complexVectorPtr += 8;
    
      65532
              result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
    
              _mm256_store_ps(magnitudeVectorPtr, result);
    
      65532
              magnitudeVectorPtr += 8;
    
          }
    
      4
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 28 times.
✓ Branch 1 taken 4 times.

      32
          for (; number < num_points; number++) {
    
      28
              float val1Real = *complexVectorPtr++;
    
      28
              float val1Imag = *complexVectorPtr++;
    
      28
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      4
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      #include <volk/volk_sse3_intrinsics.h>
    
      2
      static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
    
              _mm_store_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_SSE
    
      #include <volk/volk_sse_intrinsics.h>
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector,
    
                                                               const lv_32fc_t* complexVector,
    
                                                               unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
          __m128 cplxValue1, cplxValue2, result;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
    
              _mm_store_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = *complexVectorPtr++;
    
      6
              float val1Imag = *complexVectorPtr++;
    
      6
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector,
    
                                                              const lv_32fc_t* complexVector,
    
                                                              unsigned int num_points)
    
      {
    
          unsigned int number = 0;
    
          const unsigned int quarterPoints = num_points / 4;
    
          const float* complexVectorPtr = (float*)complexVector;
    
          float* magnitudeVectorPtr = magnitudeVector;
    
          float32x4x2_t cmplx_val;
    
          float32x4_t result;
    
          for (; number < quarterPoints; number++) {
    
              cmplx_val = vld2q_f32(complexVectorPtr);
    
              complexVectorPtr += 8;
    
              cmplx_val.val[0] =
    
                  vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
    
              cmplx_val.val[1] =
    
                  vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
    
              result =
    
                  vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
    
              vst1q_f32(magnitudeVectorPtr, result);
    
              magnitudeVectorPtr += 4;
    
          }
    
          number = quarterPoints * 4;
    
          for (; number < num_points; number++) {
    
              float val1Real = *complexVectorPtr++;
    
              float val1Imag = *complexVectorPtr++;
    
              *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_magnitude_squared_32f_a_generic(
    
          float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              const float real = *complexVectorPtr++;
    
      262142
              const float imag = *complexVectorPtr++;
    
      262142
              *magnitudeVectorPtr++ = (real * real) + (imag * imag);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_magnitude_squared_32f
12			*
13			* \b Overview
14			*
15			* Calculates the magnitude squared of the complexVector and stores
16			* the results in the magnitudeVector.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_magnitude_squared_32f(float* magnitudeVector, const lv_32fc_t*
21			* complexVector, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector.
25			* \li num_points: The number of samples.
26			*
27			* \b Outputs
28			* \li magnitudeVector: The output value.
29			*
30			* \b Example
31			* Calculate the magnitude squared of \f$x^2 + x\f$ for points around the unit circle.
32			* \code
33			* int N = 10;
34			* unsigned int alignment = volk_get_alignment();
35			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
36			* float* magnitude = (float)volk_malloc(sizeof(float)N, alignment);
37			*
38			* for(unsigned int ii = 0; ii < N/2; ++ii){
39			* float real = 2.f * ((float)ii / (float)N) - 1.f;
40			* float imag = std::sqrt(1.f - real * real);
41			* in[ii] = lv_cmake(real, imag);
42			* in[ii] = in[ii] * in[ii] + in[ii];
43			* in[N-ii] = lv_cmake(real, imag);
44			* in[N-ii] = in[N-ii] * in[N-ii] + in[N-ii];
45			* }
46			*
47			* volk_32fc_magnitude_32f(magnitude, in, N);
48			*
49			* for(unsigned int ii = 0; ii < N; ++ii){
50			* printf("out(%i) = %+.1f\n", ii, magnitude[ii]);
51			* }
52			*
53			* volk_free(in);
54			* volk_free(magnitude);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
59			#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
60
61			#include <inttypes.h>
62			#include <math.h>
63			#include <stdio.h>
64
65			#ifdef LV_HAVE_AVX
66			#include <immintrin.h>
67			#include <volk/volk_avx_intrinsics.h>
68
69		2	static inline void volk_32fc_magnitude_squared_32f_u_avx(float* magnitudeVector,
70			const lv_32fc_t* complexVector,
71			unsigned int num_points)
72			{
73		2	unsigned int number = 0;
74		2	const unsigned int eighthPoints = num_points / 8;
75
76		2	const float* complexVectorPtr = (float*)complexVector;
77		2	float* magnitudeVectorPtr = magnitudeVector;
78
79			__m256 cplxValue1, cplxValue2, result;
80
81	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
82		32766	cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
83		32766	cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
84		32766	result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
85			_mm256_storeu_ps(magnitudeVectorPtr, result);
86
87		32766	complexVectorPtr += 16;
88		32766	magnitudeVectorPtr += 8;
89			}
90
91		2	number = eighthPoints * 8;
92	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
93		14	float val1Real = *complexVectorPtr++;
94		14	float val1Imag = *complexVectorPtr++;
95		14	magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
96			}
97		2	}
98			#endif /* LV_HAVE_AVX */
99
100
101			#ifdef LV_HAVE_SSE3
102			#include <pmmintrin.h>
103			#include <volk/volk_sse3_intrinsics.h>
104
105		2	static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* magnitudeVector,
106			const lv_32fc_t* complexVector,
107			unsigned int num_points)
108			{
109		2	unsigned int number = 0;
110		2	const unsigned int quarterPoints = num_points / 4;
111
112		2	const float* complexVectorPtr = (float*)complexVector;
113		2	float* magnitudeVectorPtr = magnitudeVector;
114
115			__m128 cplxValue1, cplxValue2, result;
116	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
117		65534	cplxValue1 = _mm_loadu_ps(complexVectorPtr);
118		65534	complexVectorPtr += 4;
119
120		65534	cplxValue2 = _mm_loadu_ps(complexVectorPtr);
121		65534	complexVectorPtr += 4;
122
123		65534	result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
124			_mm_storeu_ps(magnitudeVectorPtr, result);
125		65534	magnitudeVectorPtr += 4;
126			}
127
128		2	number = quarterPoints * 4;
129	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
130		6	float val1Real = *complexVectorPtr++;
131		6	float val1Imag = *complexVectorPtr++;
132		6	magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
133			}
134		2	}
135			#endif /* LV_HAVE_SSE3 */
136
137
138			#ifdef LV_HAVE_SSE
139			#include <volk/volk_sse_intrinsics.h>
140			#include <xmmintrin.h>
141
142		2	static inline void volk_32fc_magnitude_squared_32f_u_sse(float* magnitudeVector,
143			const lv_32fc_t* complexVector,
144			unsigned int num_points)
145			{
146		2	unsigned int number = 0;
147		2	const unsigned int quarterPoints = num_points / 4;
148
149		2	const float* complexVectorPtr = (float*)complexVector;
150		2	float* magnitudeVectorPtr = magnitudeVector;
151
152			__m128 cplxValue1, cplxValue2, result;
153
154	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
155		65534	cplxValue1 = _mm_loadu_ps(complexVectorPtr);
156		65534	complexVectorPtr += 4;
157
158		65534	cplxValue2 = _mm_loadu_ps(complexVectorPtr);
159		65534	complexVectorPtr += 4;
160
161		65534	result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
162			_mm_storeu_ps(magnitudeVectorPtr, result);
163		65534	magnitudeVectorPtr += 4;
164			}
165
166		2	number = quarterPoints * 4;
167	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
168		6	float val1Real = *complexVectorPtr++;
169		6	float val1Imag = *complexVectorPtr++;
170		6	magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
171			}
172		2	}
173			#endif /* LV_HAVE_SSE */
174
175
176			#ifdef LV_HAVE_GENERIC
177
178		2	static inline void volk_32fc_magnitude_squared_32f_generic(float* magnitudeVector,
179			const lv_32fc_t* complexVector,
180			unsigned int num_points)
181			{
182		2	const float* complexVectorPtr = (float*)complexVector;
183		2	float* magnitudeVectorPtr = magnitudeVector;
184		2	unsigned int number = 0;
185	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
186		262142	const float real = *complexVectorPtr++;
187		262142	const float imag = *complexVectorPtr++;
188		262142	magnitudeVectorPtr++ = (real real) + (imag * imag);
189			}
190		2	}
191			#endif /* LV_HAVE_GENERIC */
192
193
194			#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
195			#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
196			#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
197
198			#include <inttypes.h>
199			#include <math.h>
200			#include <stdio.h>
201
202			#ifdef LV_HAVE_AVX
203			#include <immintrin.h>
204			#include <volk/volk_avx_intrinsics.h>
205
206		4	static inline void volk_32fc_magnitude_squared_32f_a_avx(float* magnitudeVector,
207			const lv_32fc_t* complexVector,
208			unsigned int num_points)
209			{
210		4	unsigned int number = 0;
211		4	const unsigned int eighthPoints = num_points / 8;
212
213		4	const float* complexVectorPtr = (float*)complexVector;
214		4	float* magnitudeVectorPtr = magnitudeVector;
215
216			__m256 cplxValue1, cplxValue2, result;
217	2/2 ✓ Branch 0 taken 65532 times. ✓ Branch 1 taken 4 times.	65536	for (; number < eighthPoints; number++) {
218		65532	cplxValue1 = _mm256_load_ps(complexVectorPtr);
219		65532	complexVectorPtr += 8;
220
221		65532	cplxValue2 = _mm256_load_ps(complexVectorPtr);
222		65532	complexVectorPtr += 8;
223
224		65532	result = _mm256_magnitudesquared_ps(cplxValue1, cplxValue2);
225			_mm256_store_ps(magnitudeVectorPtr, result);
226		65532	magnitudeVectorPtr += 8;
227			}
228
229		4	number = eighthPoints * 8;
230	2/2 ✓ Branch 0 taken 28 times. ✓ Branch 1 taken 4 times.	32	for (; number < num_points; number++) {
231		28	float val1Real = *complexVectorPtr++;
232		28	float val1Imag = *complexVectorPtr++;
233		28	magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
234			}
235		4	}
236			#endif /* LV_HAVE_AVX */
237
238
239			#ifdef LV_HAVE_SSE3
240			#include <pmmintrin.h>
241			#include <volk/volk_sse3_intrinsics.h>
242
243		2	static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* magnitudeVector,
244			const lv_32fc_t* complexVector,
245			unsigned int num_points)
246			{
247		2	unsigned int number = 0;
248		2	const unsigned int quarterPoints = num_points / 4;
249
250		2	const float* complexVectorPtr = (float*)complexVector;
251		2	float* magnitudeVectorPtr = magnitudeVector;
252
253			__m128 cplxValue1, cplxValue2, result;
254	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
255		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
256		65534	complexVectorPtr += 4;
257
258		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
259		65534	complexVectorPtr += 4;
260
261		65534	result = _mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2);
262			_mm_store_ps(magnitudeVectorPtr, result);
263		65534	magnitudeVectorPtr += 4;
264			}
265
266		2	number = quarterPoints * 4;
267	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
268		6	float val1Real = *complexVectorPtr++;
269		6	float val1Imag = *complexVectorPtr++;
270		6	magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
271			}
272		2	}
273			#endif /* LV_HAVE_SSE3 */
274
275
276			#ifdef LV_HAVE_SSE
277			#include <volk/volk_sse_intrinsics.h>
278			#include <xmmintrin.h>
279
280		2	static inline void volk_32fc_magnitude_squared_32f_a_sse(float* magnitudeVector,
281			const lv_32fc_t* complexVector,
282			unsigned int num_points)
283			{
284		2	unsigned int number = 0;
285		2	const unsigned int quarterPoints = num_points / 4;
286
287		2	const float* complexVectorPtr = (float*)complexVector;
288		2	float* magnitudeVectorPtr = magnitudeVector;
289
290			__m128 cplxValue1, cplxValue2, result;
291	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
292		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
293		65534	complexVectorPtr += 4;
294
295		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
296		65534	complexVectorPtr += 4;
297
298		65534	result = _mm_magnitudesquared_ps(cplxValue1, cplxValue2);
299			_mm_store_ps(magnitudeVectorPtr, result);
300		65534	magnitudeVectorPtr += 4;
301			}
302
303		2	number = quarterPoints * 4;
304	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
305		6	float val1Real = *complexVectorPtr++;
306		6	float val1Imag = *complexVectorPtr++;
307		6	magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
308			}
309		2	}
310			#endif /* LV_HAVE_SSE */
311
312
313			#ifdef LV_HAVE_NEON
314			#include <arm_neon.h>
315
316			static inline void volk_32fc_magnitude_squared_32f_neon(float* magnitudeVector,
317			const lv_32fc_t* complexVector,
318			unsigned int num_points)
319			{
320			unsigned int number = 0;
321			const unsigned int quarterPoints = num_points / 4;
322
323			const float* complexVectorPtr = (float*)complexVector;
324			float* magnitudeVectorPtr = magnitudeVector;
325
326			float32x4x2_t cmplx_val;
327			float32x4_t result;
328			for (; number < quarterPoints; number++) {
329			cmplx_val = vld2q_f32(complexVectorPtr);
330			complexVectorPtr += 8;
331
332			cmplx_val.val[0] =
333			vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // Square the values
334			cmplx_val.val[1] =
335			vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // Square the values
336
337			result =
338			vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 and Q2 values
339
340			vst1q_f32(magnitudeVectorPtr, result);
341			magnitudeVectorPtr += 4;
342			}
343
344			number = quarterPoints * 4;
345			for (; number < num_points; number++) {
346			float val1Real = *complexVectorPtr++;
347			float val1Imag = *complexVectorPtr++;
348			magnitudeVectorPtr++ = (val1Real val1Real) + (val1Imag * val1Imag);
349			}
350			}
351			#endif /* LV_HAVE_NEON */
352
353
354			#ifdef LV_HAVE_GENERIC
355
356		2	static inline void volk_32fc_magnitude_squared_32f_a_generic(
357			float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
358			{
359		2	const float* complexVectorPtr = (float*)complexVector;
360		2	float* magnitudeVectorPtr = magnitudeVector;
361		2	unsigned int number = 0;
362	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
363		262142	const float real = *complexVectorPtr++;
364		262142	const float imag = *complexVectorPtr++;
365		262142	magnitudeVectorPtr++ = (real real) + (imag * imag);
366			}
367		2	}
368			#endif /* LV_HAVE_GENERIC */
369
370			#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
371