GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_s32f_magnitude_16i.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	115	115	100.0%
Functions:	5	5	100.0%
Branches:	10	10	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_s32f_magnitude_16i
    
       *
    
       * \b Overview
    
       *
    
       * Calculates the magnitude of the complexVector and stores the
    
       * results in the magnitudeVector. The results are scaled and
    
       * converted into 16-bit shorts.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_s32f_magnitude_16i(int16_t* magnitudeVector, const lv_32fc_t*
    
       * complexVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li num_points: The number of samples.
    
       *
    
       * \b Outputs
    
       * \li magnitudeVector: The output value as 16-bit shorts.
    
       *
    
       * \b Example
    
       * Generate points around the unit circle and map them to integers with
    
       * magnitude 50 to preserve smallest deltas.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   int16_t* out = (int16_t*)volk_malloc(sizeof(int16_t)*N, alignment);
    
       *   float scale = 50.f;
    
       *
    
       *   for(unsigned int ii = 0; ii < N/2; ++ii){
    
       *       // Generate points around the unit circle
    
       *       float real = -4.f * ((float)ii / (float)N) + 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *       in[ii+N/2] = lv_cmake(-real, -imag);
    
       *   }
    
       *
    
       *   volk_32fc_s32f_magnitude_16i(out, in, scale, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %i\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
    
      #define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_GENERIC
    
      10
      static inline void volk_32fc_s32f_magnitude_16i_generic(int16_t* magnitudeVector,
    
                                                              const lv_32fc_t* complexVector,
    
                                                              const float scalar,
    
                                                              unsigned int num_points)
    
      {
    
      10
          const float* complexVectorPtr = (float*)complexVector;
    
      10
          int16_t* magnitudeVectorPtr = magnitudeVector;
    
      10
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262182 times.
✓ Branch 1 taken 10 times.

      262192
          for (number = 0; number < num_points; number++) {
    
      262182
              __VOLK_VOLATILE float real = *complexVectorPtr++;
    
      262182
              __VOLK_VOLATILE float imag = *complexVectorPtr++;
    
      262182
              real *= real;
    
      262182
              imag *= imag;
    
      262182
              *magnitudeVectorPtr++ = (int16_t)rintf(scalar * sqrtf(real + imag));
    
          }
    
      10
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
    
                                                             const lv_32fc_t* complexVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          int16_t* magnitudeVectorPtr = magnitudeVector;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
      2
          __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
    
          __m256 cplxValue1, cplxValue2, result;
    
          __m256i resultInt;
    
          __m128i resultShort;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      32766
              result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      32766
              result = _mm256_sqrt_ps(result);
    
      32766
              result = _mm256_mul_ps(result, vScalar);
    
      32766
              resultInt = _mm256_cvtps_epi32(result);
    
      32766
              resultInt = _mm256_packs_epi32(resultInt, resultInt);
    
      32766
              resultInt = _mm256_permutevar8x32_epi32(
    
                  resultInt, idx); // permute to compensate for shuffling in hadd and packs
    
      32766
              resultShort = _mm256_extracti128_si256(resultInt, 0);
    
              _mm_store_si128((__m128i*)magnitudeVectorPtr, resultShort);
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          volk_32fc_s32f_magnitude_16i_generic(
    
      2
              magnitudeVector + number, complexVector + number, scalar, num_points - number);
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      2
      static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector,
    
                                                             const lv_32fc_t* complexVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          int16_t* magnitudeVectorPtr = magnitudeVector;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 cplxValue1, cplxValue2, result;
    
          __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      65534
              cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      65534
              result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      65534
              result = _mm_sqrt_ps(result);
    
      65534
              result = _mm_mul_ps(result, vScalar);
    
              _mm_store_ps(floatBuffer, result);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          volk_32fc_s32f_magnitude_16i_generic(
    
      2
              magnitudeVector + number, complexVector + number, scalar, num_points - number);
    
      2
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector,
    
                                                            const lv_32fc_t* complexVector,
    
                                                            const float scalar,
    
                                                            unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          int16_t* magnitudeVectorPtr = magnitudeVector;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 cplxValue1, cplxValue2, result;
    
          __m128 iValue, qValue;
    
          __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
              // Arrange in i1i2i3i4 format
    
      65534
              iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
              // Arrange in q1q2q3q4 format
    
      65534
              qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
    
      65534
              __VOLK_VOLATILE __m128 iValue2 =
    
      65534
                  _mm_mul_ps(iValue, iValue); // Square the I values
    
      65534
              __VOLK_VOLATILE __m128 qValue2 =
    
      65534
                  _mm_mul_ps(qValue, qValue); // Square the Q Values
    
      131068
              result = _mm_add_ps(iValue2, qValue2); // Add the I2 and Q2 values
    
      65534
              result = _mm_sqrt_ps(result);
    
      65534
              result = _mm_mul_ps(result, vScalar);
    
              _mm_store_ps(floatBuffer, result);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
    
      65534
              *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          volk_32fc_s32f_magnitude_16i_generic(
    
      2
              magnitudeVector + number, complexVector + number, scalar, num_points - number);
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
    
      #ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
    
      #define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
    
                                                             const lv_32fc_t* complexVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          int16_t* magnitudeVectorPtr = magnitudeVector;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
      2
          __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
    
          __m256 cplxValue1, cplxValue2, result;
    
          __m256i resultInt;
    
          __m128i resultShort;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      32766
              result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      32766
              result = _mm256_sqrt_ps(result);
    
      32766
              result = _mm256_mul_ps(result, vScalar);
    
      32766
              resultInt = _mm256_cvtps_epi32(result);
    
      32766
              resultInt = _mm256_packs_epi32(resultInt, resultInt);
    
      32766
              resultInt = _mm256_permutevar8x32_epi32(
    
                  resultInt, idx); // permute to compensate for shuffling in hadd and packs
    
      32766
              resultShort = _mm256_extracti128_si256(resultInt, 0);
    
              _mm_storeu_si128((__m128i*)magnitudeVectorPtr, resultShort);
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          volk_32fc_s32f_magnitude_16i_generic(
    
      2
              magnitudeVector + number, complexVector + number, scalar, num_points - number);
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_s32f_magnitude_16i
12			*
13			* \b Overview
14			*
15			* Calculates the magnitude of the complexVector and stores the
16			* results in the magnitudeVector. The results are scaled and
17			* converted into 16-bit shorts.
18			*
19			* <b>Dispatcher Prototype</b>
20			* \code
21			* void volk_32fc_s32f_magnitude_16i(int16_t* magnitudeVector, const lv_32fc_t*
22			* complexVector, unsigned int num_points) \endcode
23			*
24			* \b Inputs
25			* \li complexVector: The complex input vector.
26			* \li num_points: The number of samples.
27			*
28			* \b Outputs
29			* \li magnitudeVector: The output value as 16-bit shorts.
30			*
31			* \b Example
32			* Generate points around the unit circle and map them to integers with
33			* magnitude 50 to preserve smallest deltas.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
38			* int16_t* out = (int16_t)volk_malloc(sizeof(int16_t)N, alignment);
39			* float scale = 50.f;
40			*
41			* for(unsigned int ii = 0; ii < N/2; ++ii){
42			* // Generate points around the unit circle
43			* float real = -4.f * ((float)ii / (float)N) + 1.f;
44			* float imag = std::sqrt(1.f - real * real);
45			* in[ii] = lv_cmake(real, imag);
46			* in[ii+N/2] = lv_cmake(-real, -imag);
47			* }
48			*
49			* volk_32fc_s32f_magnitude_16i(out, in, scale, N);
50			*
51			* for(unsigned int ii = 0; ii < N; ++ii){
52			* printf("out[%u] = %i\n", ii, out[ii]);
53			* }
54			*
55			* volk_free(in);
56			* volk_free(out);
57			* \endcode
58			*/
59
60			#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
61			#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
62
63			#include <inttypes.h>
64			#include <math.h>
65			#include <stdio.h>
66			#include <volk/volk_common.h>
67
68			#ifdef LV_HAVE_GENERIC
69
70		10	static inline void volk_32fc_s32f_magnitude_16i_generic(int16_t* magnitudeVector,
71			const lv_32fc_t* complexVector,
72			const float scalar,
73			unsigned int num_points)
74			{
75		10	const float* complexVectorPtr = (float*)complexVector;
76		10	int16_t* magnitudeVectorPtr = magnitudeVector;
77		10	unsigned int number = 0;
78	2/2 ✓ Branch 0 taken 262182 times. ✓ Branch 1 taken 10 times.	262192	for (number = 0; number < num_points; number++) {
79		262182	__VOLK_VOLATILE float real = *complexVectorPtr++;
80		262182	__VOLK_VOLATILE float imag = *complexVectorPtr++;
81		262182	real *= real;
82		262182	imag *= imag;
83		262182	magnitudeVectorPtr++ = (int16_t)rintf(scalar sqrtf(real + imag));
84			}
85		10	}
86			#endif /* LV_HAVE_GENERIC */
87
88			#ifdef LV_HAVE_AVX2
89			#include <immintrin.h>
90
91		2	static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
92			const lv_32fc_t* complexVector,
93			const float scalar,
94			unsigned int num_points)
95			{
96		2	unsigned int number = 0;
97		2	const unsigned int eighthPoints = num_points / 8;
98
99		2	const float* complexVectorPtr = (const float*)complexVector;
100		2	int16_t* magnitudeVectorPtr = magnitudeVector;
101
102		2	__m256 vScalar = _mm256_set1_ps(scalar);
103		2	__m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
104			__m256 cplxValue1, cplxValue2, result;
105			__m256i resultInt;
106			__m128i resultShort;
107
108	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
109		32766	cplxValue1 = _mm256_load_ps(complexVectorPtr);
110		32766	complexVectorPtr += 8;
111
112		32766	cplxValue2 = _mm256_load_ps(complexVectorPtr);
113		32766	complexVectorPtr += 8;
114
115		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
116		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
117
118		32766	result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
119
120		32766	result = _mm256_sqrt_ps(result);
121
122		32766	result = _mm256_mul_ps(result, vScalar);
123
124		32766	resultInt = _mm256_cvtps_epi32(result);
125		32766	resultInt = _mm256_packs_epi32(resultInt, resultInt);
126		32766	resultInt = _mm256_permutevar8x32_epi32(
127			resultInt, idx); // permute to compensate for shuffling in hadd and packs
128		32766	resultShort = _mm256_extracti128_si256(resultInt, 0);
129			_mm_store_si128((__m128i*)magnitudeVectorPtr, resultShort);
130		32766	magnitudeVectorPtr += 8;
131			}
132
133		2	number = eighthPoints * 8;
134		2	volk_32fc_s32f_magnitude_16i_generic(
135		2	magnitudeVector + number, complexVector + number, scalar, num_points - number);
136		2	}
137			#endif /* LV_HAVE_AVX2 */
138
139			#ifdef LV_HAVE_SSE3
140			#include <pmmintrin.h>
141
142		2	static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* magnitudeVector,
143			const lv_32fc_t* complexVector,
144			const float scalar,
145			unsigned int num_points)
146			{
147		2	unsigned int number = 0;
148		2	const unsigned int quarterPoints = num_points / 4;
149
150		2	const float* complexVectorPtr = (const float*)complexVector;
151		2	int16_t* magnitudeVectorPtr = magnitudeVector;
152
153		2	__m128 vScalar = _mm_set_ps1(scalar);
154
155			__m128 cplxValue1, cplxValue2, result;
156
157			__VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
158
159	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
160		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
161		65534	complexVectorPtr += 4;
162
163		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
164		65534	complexVectorPtr += 4;
165
166		65534	cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
167		65534	cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
168
169		65534	result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
170
171		65534	result = _mm_sqrt_ps(result);
172
173		65534	result = _mm_mul_ps(result, vScalar);
174
175			_mm_store_ps(floatBuffer, result);
176		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
177		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
178		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
179		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
180			}
181
182		2	number = quarterPoints * 4;
183		2	volk_32fc_s32f_magnitude_16i_generic(
184		2	magnitudeVector + number, complexVector + number, scalar, num_points - number);
185		2	}
186			#endif /* LV_HAVE_SSE3 */
187
188
189			#ifdef LV_HAVE_SSE
190			#include <xmmintrin.h>
191
192		2	static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* magnitudeVector,
193			const lv_32fc_t* complexVector,
194			const float scalar,
195			unsigned int num_points)
196			{
197		2	unsigned int number = 0;
198		2	const unsigned int quarterPoints = num_points / 4;
199
200		2	const float* complexVectorPtr = (const float*)complexVector;
201		2	int16_t* magnitudeVectorPtr = magnitudeVector;
202
203		2	__m128 vScalar = _mm_set_ps1(scalar);
204
205			__m128 cplxValue1, cplxValue2, result;
206			__m128 iValue, qValue;
207
208			__VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
209
210	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
211		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
212		65534	complexVectorPtr += 4;
213
214		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
215		65534	complexVectorPtr += 4;
216
217			// Arrange in i1i2i3i4 format
218		65534	iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
219			// Arrange in q1q2q3q4 format
220		65534	qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
221
222		65534	__VOLK_VOLATILE __m128 iValue2 =
223		65534	_mm_mul_ps(iValue, iValue); // Square the I values
224		65534	__VOLK_VOLATILE __m128 qValue2 =
225		65534	_mm_mul_ps(qValue, qValue); // Square the Q Values
226
227		131068	result = _mm_add_ps(iValue2, qValue2); // Add the I2 and Q2 values
228
229		65534	result = _mm_sqrt_ps(result);
230
231		65534	result = _mm_mul_ps(result, vScalar);
232
233			_mm_store_ps(floatBuffer, result);
234		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
235		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
236		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
237		65534	*magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
238			}
239
240		2	number = quarterPoints * 4;
241		2	volk_32fc_s32f_magnitude_16i_generic(
242		2	magnitudeVector + number, complexVector + number, scalar, num_points - number);
243		2	}
244			#endif /* LV_HAVE_SSE */
245
246
247			#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
248
249			#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
250			#define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
251
252			#include <inttypes.h>
253			#include <math.h>
254			#include <stdio.h>
255			#include <volk/volk_common.h>
256
257			#ifdef LV_HAVE_AVX2
258			#include <immintrin.h>
259
260		2	static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
261			const lv_32fc_t* complexVector,
262			const float scalar,
263			unsigned int num_points)
264			{
265		2	unsigned int number = 0;
266		2	const unsigned int eighthPoints = num_points / 8;
267
268		2	const float* complexVectorPtr = (const float*)complexVector;
269		2	int16_t* magnitudeVectorPtr = magnitudeVector;
270
271		2	__m256 vScalar = _mm256_set1_ps(scalar);
272		2	__m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
273			__m256 cplxValue1, cplxValue2, result;
274			__m256i resultInt;
275			__m128i resultShort;
276
277	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
278		32766	cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
279		32766	complexVectorPtr += 8;
280
281		32766	cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
282		32766	complexVectorPtr += 8;
283
284		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
285		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
286
287		32766	result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
288
289		32766	result = _mm256_sqrt_ps(result);
290
291		32766	result = _mm256_mul_ps(result, vScalar);
292
293		32766	resultInt = _mm256_cvtps_epi32(result);
294		32766	resultInt = _mm256_packs_epi32(resultInt, resultInt);
295		32766	resultInt = _mm256_permutevar8x32_epi32(
296			resultInt, idx); // permute to compensate for shuffling in hadd and packs
297		32766	resultShort = _mm256_extracti128_si256(resultInt, 0);
298			_mm_storeu_si128((__m128i*)magnitudeVectorPtr, resultShort);
299		32766	magnitudeVectorPtr += 8;
300			}
301
302		2	number = eighthPoints * 8;
303		2	volk_32fc_s32f_magnitude_16i_generic(
304		2	magnitudeVector + number, complexVector + number, scalar, num_points - number);
305		2	}
306			#endif /* LV_HAVE_AVX2 */
307
308			#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_u_H */
309