GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_16ic_s32f_magnitude_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	143	143	100.0%
Functions:	5	5	100.0%
Branches:	18	18	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_16ic_s32f_magnitude_32f
    
       *
    
       * \b Overview
    
       *
    
       * Computes the magnitude of the complexVector and stores the results
    
       * in the magnitudeVector as a scaled floating point number.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_16ic_s32f_magnitude_32f(float* magnitudeVector, const lv_16sc_t*
    
       * complexVector, const float scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector of complex 16-bit shorts.
    
       * \li scalar: The value to be divided against each sample of the input complex vector.
    
       * \li num_points: The number of samples.
    
       *
    
       * \b Outputs
    
       * \li magnitudeVector: The magnitude of the complex values.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * volk_16ic_s32f_magnitude_32f();
    
       *
    
       * volk_free(x);
    
       * volk_free(t);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
    
      #define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_16ic_s32f_magnitude_32f_a_avx2(float* magnitudeVector,
    
                                                             const lv_16sc_t* complexVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const int16_t* complexVectorPtr = (const int16_t*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      4
          __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
    
          __m256 cplxValue1, cplxValue2, result;
    
          __m256i int1, int2;
    
          __m128i short1, short2;
    
      2
          __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
    
      32766
              complexVectorPtr += 16;
    
      32766
              short1 = _mm256_extracti128_si256(int1, 0);
    
      32766
              short2 = _mm256_extracti128_si256(int1, 1);
    
      32766
              int1 = _mm256_cvtepi16_epi32(short1);
    
      32766
              int2 = _mm256_cvtepi16_epi32(short2);
    
      32766
              cplxValue1 = _mm256_cvtepi32_ps(int1);
    
      32766
              cplxValue2 = _mm256_cvtepi32_ps(int2);
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      32766
              result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      32766
              result = _mm256_permutevar8x32_ps(result, idx);
    
      32766
              result = _mm256_sqrt_ps(result); // Square root the values
    
              _mm256_store_ps(magnitudeVectorPtr, result);
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          magnitudeVectorPtr = &magnitudeVector[number];
    
      2
          complexVectorPtr = (const int16_t*)&complexVector[number];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float val1Real = (float)(*complexVectorPtr++) / scalar;
    
      14
              float val1Imag = (float)(*complexVectorPtr++) / scalar;
    
      14
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE3
    
      #include <pmmintrin.h>
    
      2
      static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector,
    
                                                             const lv_16sc_t* complexVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const int16_t* complexVectorPtr = (const int16_t*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          __m128 invScalar = _mm_set_ps1(1.0 / scalar);
    
          __m128 cplxValue1, cplxValue2, result;
    
          __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
    
      65534
              inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
    
      65534
              inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
    
      65534
              inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
    
      65534
              inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
    
      65534
              inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
    
      65534
              inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
    
      65534
              inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
    
      65534
              cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
    
      65534
              cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
    
      65534
              complexVectorPtr += 8;
    
      65534
              cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
    
      65534
              cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
    
      65534
              cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      65534
              cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      65534
              result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      65534
              result = _mm_sqrt_ps(result); // Square root the values
    
              _mm_store_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          magnitudeVectorPtr = &magnitudeVector[number];
    
      2
          complexVectorPtr = (const int16_t*)&complexVector[number];
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = (float)(*complexVectorPtr++) / scalar;
    
      6
              float val1Imag = (float)(*complexVectorPtr++) / scalar;
    
      6
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE3 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector,
    
                                                            const lv_16sc_t* complexVector,
    
                                                            const float scalar,
    
                                                            unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const int16_t* complexVectorPtr = (const int16_t*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m128 invScalar = _mm_set_ps1(iScalar);
    
          __m128 cplxValue1, cplxValue2, result, re, im;
    
          __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
    
      65534
              inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
    
      65534
              inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
    
      65534
              inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
    
      65534
              inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
    
      65534
              inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
    
      65534
              inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
    
      65534
              inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
    
      65534
              cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
    
      65534
              cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
    
      65534
              re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
    
      65534
              im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
    
      65534
              complexVectorPtr += 8;
    
      65534
              cplxValue1 = _mm_mul_ps(re, invScalar);
    
      65534
              cplxValue2 = _mm_mul_ps(im, invScalar);
    
      65534
              cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      65534
              cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      65534
              result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      65534
              result = _mm_sqrt_ps(result); // Square root the values
    
              _mm_store_ps(magnitudeVectorPtr, result);
    
      65534
              magnitudeVectorPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          magnitudeVectorPtr = &magnitudeVector[number];
    
      2
          complexVectorPtr = (const int16_t*)&complexVector[number];
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              float val1Real = (float)(*complexVectorPtr++) * iScalar;
    
      6
              float val1Imag = (float)(*complexVectorPtr++) * iScalar;
    
      6
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_16ic_s32f_magnitude_32f_generic(float* magnitudeVector,
    
                                                              const lv_16sc_t* complexVector,
    
                                                              const float scalar,
    
                                                              unsigned int num_points)
    
      {
    
      2
          const int16_t* complexVectorPtr = (const int16_t*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      2
          unsigned int number = 0;
    
      2
          const float invScalar = 1.0 / scalar;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              float real = ((float)(*complexVectorPtr++)) * invScalar;
    
      262142
              float imag = ((float)(*complexVectorPtr++)) * invScalar;
    
      262142
              *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
    
      #ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
    
      #define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
    
      #include <inttypes.h>
    
      #include <math.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_16ic_s32f_magnitude_32f_u_avx2(float* magnitudeVector,
    
                                                             const lv_16sc_t* complexVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const int16_t* complexVectorPtr = (const int16_t*)complexVector;
    
      2
          float* magnitudeVectorPtr = magnitudeVector;
    
      4
          __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
    
          __m256 cplxValue1, cplxValue2, result;
    
          __m256i int1, int2;
    
          __m128i short1, short2;
    
      2
          __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
    
      32766
              complexVectorPtr += 16;
    
      32766
              short1 = _mm256_extracti128_si256(int1, 0);
    
      32766
              short2 = _mm256_extracti128_si256(int1, 1);
    
      32766
              int1 = _mm256_cvtepi16_epi32(short1);
    
      32766
              int2 = _mm256_cvtepi16_epi32(short2);
    
      32766
              cplxValue1 = _mm256_cvtepi32_ps(int1);
    
      32766
              cplxValue2 = _mm256_cvtepi32_ps(int2);
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
    
      32766
              result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
    
      32766
              result = _mm256_permutevar8x32_ps(result, idx);
    
      32766
              result = _mm256_sqrt_ps(result); // Square root the values
    
              _mm256_storeu_ps(magnitudeVectorPtr, result);
    
      32766
              magnitudeVectorPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          magnitudeVectorPtr = &magnitudeVector[number];
    
      2
          complexVectorPtr = (const int16_t*)&complexVector[number];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              float val1Real = (float)(*complexVectorPtr++) / scalar;
    
      14
              float val1Imag = (float)(*complexVectorPtr++) / scalar;
    
      14
              *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_16ic_s32f_magnitude_32f
12			*
13			* \b Overview
14			*
15			* Computes the magnitude of the complexVector and stores the results
16			* in the magnitudeVector as a scaled floating point number.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_16ic_s32f_magnitude_32f(float* magnitudeVector, const lv_16sc_t*
21			* complexVector, const float scalar, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector of complex 16-bit shorts.
25			* \li scalar: The value to be divided against each sample of the input complex vector.
26			* \li num_points: The number of samples.
27			*
28			* \b Outputs
29			* \li magnitudeVector: The magnitude of the complex values.
30			*
31			* \b Example
32			* \code
33			* int N = 10000;
34			*
35			* volk_16ic_s32f_magnitude_32f();
36			*
37			* volk_free(x);
38			* volk_free(t);
39			* \endcode
40			*/
41
42			#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
43			#define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
44
45			#include <inttypes.h>
46			#include <math.h>
47			#include <stdio.h>
48			#include <volk/volk_common.h>
49
50			#ifdef LV_HAVE_AVX2
51			#include <immintrin.h>
52
53		2	static inline void volk_16ic_s32f_magnitude_32f_a_avx2(float* magnitudeVector,
54			const lv_16sc_t* complexVector,
55			const float scalar,
56			unsigned int num_points)
57			{
58		2	unsigned int number = 0;
59		2	const unsigned int eighthPoints = num_points / 8;
60
61		2	const int16_t* complexVectorPtr = (const int16_t*)complexVector;
62		2	float* magnitudeVectorPtr = magnitudeVector;
63
64		4	__m256 invScalar = _mm256_set1_ps(1.0 / scalar);
65
66			__m256 cplxValue1, cplxValue2, result;
67			__m256i int1, int2;
68			__m128i short1, short2;
69		2	__m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
70
71	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
72
73		32766	int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
74		32766	complexVectorPtr += 16;
75		32766	short1 = _mm256_extracti128_si256(int1, 0);
76		32766	short2 = _mm256_extracti128_si256(int1, 1);
77
78		32766	int1 = _mm256_cvtepi16_epi32(short1);
79		32766	int2 = _mm256_cvtepi16_epi32(short2);
80		32766	cplxValue1 = _mm256_cvtepi32_ps(int1);
81		32766	cplxValue2 = _mm256_cvtepi32_ps(int2);
82
83		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
84		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
85
86		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
87		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
88
89		32766	result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
90		32766	result = _mm256_permutevar8x32_ps(result, idx);
91
92		32766	result = _mm256_sqrt_ps(result); // Square root the values
93
94			_mm256_store_ps(magnitudeVectorPtr, result);
95
96		32766	magnitudeVectorPtr += 8;
97			}
98
99		2	number = eighthPoints * 8;
100		2	magnitudeVectorPtr = &magnitudeVector[number];
101		2	complexVectorPtr = (const int16_t*)&complexVector[number];
102	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
103		14	float val1Real = (float)(*complexVectorPtr++) / scalar;
104		14	float val1Imag = (float)(*complexVectorPtr++) / scalar;
105		14	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
106			}
107		2	}
108			#endif /* LV_HAVE_AVX2 */
109
110
111			#ifdef LV_HAVE_SSE3
112			#include <pmmintrin.h>
113
114		2	static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector,
115			const lv_16sc_t* complexVector,
116			const float scalar,
117			unsigned int num_points)
118			{
119		2	unsigned int number = 0;
120		2	const unsigned int quarterPoints = num_points / 4;
121
122		2	const int16_t* complexVectorPtr = (const int16_t*)complexVector;
123		2	float* magnitudeVectorPtr = magnitudeVector;
124
125		2	__m128 invScalar = _mm_set_ps1(1.0 / scalar);
126
127			__m128 cplxValue1, cplxValue2, result;
128
129			__VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
130
131	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
132
133		65534	inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
134		65534	inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
135		65534	inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
136		65534	inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
137
138		65534	inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
139		65534	inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
140		65534	inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
141		65534	inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
142
143		65534	cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
144		65534	cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
145
146		65534	complexVectorPtr += 8;
147
148		65534	cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
149		65534	cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
150
151		65534	cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
152		65534	cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
153
154		65534	result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
155
156		65534	result = _mm_sqrt_ps(result); // Square root the values
157
158			_mm_store_ps(magnitudeVectorPtr, result);
159
160		65534	magnitudeVectorPtr += 4;
161			}
162
163		2	number = quarterPoints * 4;
164		2	magnitudeVectorPtr = &magnitudeVector[number];
165		2	complexVectorPtr = (const int16_t*)&complexVector[number];
166	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
167		6	float val1Real = (float)(*complexVectorPtr++) / scalar;
168		6	float val1Imag = (float)(*complexVectorPtr++) / scalar;
169		6	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
170			}
171		2	}
172			#endif /* LV_HAVE_SSE3 */
173
174			#ifdef LV_HAVE_SSE
175			#include <xmmintrin.h>
176
177		2	static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector,
178			const lv_16sc_t* complexVector,
179			const float scalar,
180			unsigned int num_points)
181			{
182		2	unsigned int number = 0;
183		2	const unsigned int quarterPoints = num_points / 4;
184
185		2	const int16_t* complexVectorPtr = (const int16_t*)complexVector;
186		2	float* magnitudeVectorPtr = magnitudeVector;
187
188		2	const float iScalar = 1.0 / scalar;
189		2	__m128 invScalar = _mm_set_ps1(iScalar);
190
191			__m128 cplxValue1, cplxValue2, result, re, im;
192
193			__VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
194
195	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
196		65534	inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
197		65534	inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
198		65534	inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
199		65534	inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
200
201		65534	inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
202		65534	inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
203		65534	inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
204		65534	inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
205
206		65534	cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
207		65534	cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
208
209		65534	re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
210		65534	im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
211
212		65534	complexVectorPtr += 8;
213
214		65534	cplxValue1 = _mm_mul_ps(re, invScalar);
215		65534	cplxValue2 = _mm_mul_ps(im, invScalar);
216
217		65534	cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
218		65534	cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
219
220		65534	result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
221
222		65534	result = _mm_sqrt_ps(result); // Square root the values
223
224			_mm_store_ps(magnitudeVectorPtr, result);
225
226		65534	magnitudeVectorPtr += 4;
227			}
228
229		2	number = quarterPoints * 4;
230		2	magnitudeVectorPtr = &magnitudeVector[number];
231		2	complexVectorPtr = (const int16_t*)&complexVector[number];
232	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
233		6	float val1Real = (float)(complexVectorPtr++) iScalar;
234		6	float val1Imag = (float)(complexVectorPtr++) iScalar;
235		6	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
236			}
237		2	}
238
239
240			#endif /* LV_HAVE_SSE */
241
242			#ifdef LV_HAVE_GENERIC
243
244		2	static inline void volk_16ic_s32f_magnitude_32f_generic(float* magnitudeVector,
245			const lv_16sc_t* complexVector,
246			const float scalar,
247			unsigned int num_points)
248			{
249		2	const int16_t* complexVectorPtr = (const int16_t*)complexVector;
250		2	float* magnitudeVectorPtr = magnitudeVector;
251		2	unsigned int number = 0;
252		2	const float invScalar = 1.0 / scalar;
253	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
254		262142	float real = ((float)(complexVectorPtr++)) invScalar;
255		262142	float imag = ((float)(complexVectorPtr++)) invScalar;
256		262142	magnitudeVectorPtr++ = sqrtf((real real) + (imag * imag));
257			}
258		2	}
259			#endif /* LV_HAVE_GENERIC */
260
261
262			#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
263
264			#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
265			#define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
266
267			#include <inttypes.h>
268			#include <math.h>
269			#include <stdio.h>
270			#include <volk/volk_common.h>
271
272			#ifdef LV_HAVE_AVX2
273			#include <immintrin.h>
274
275		2	static inline void volk_16ic_s32f_magnitude_32f_u_avx2(float* magnitudeVector,
276			const lv_16sc_t* complexVector,
277			const float scalar,
278			unsigned int num_points)
279			{
280		2	unsigned int number = 0;
281		2	const unsigned int eighthPoints = num_points / 8;
282
283		2	const int16_t* complexVectorPtr = (const int16_t*)complexVector;
284		2	float* magnitudeVectorPtr = magnitudeVector;
285
286		4	__m256 invScalar = _mm256_set1_ps(1.0 / scalar);
287
288			__m256 cplxValue1, cplxValue2, result;
289			__m256i int1, int2;
290			__m128i short1, short2;
291		2	__m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
292
293	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
294
295		32766	int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
296		32766	complexVectorPtr += 16;
297		32766	short1 = _mm256_extracti128_si256(int1, 0);
298		32766	short2 = _mm256_extracti128_si256(int1, 1);
299
300		32766	int1 = _mm256_cvtepi16_epi32(short1);
301		32766	int2 = _mm256_cvtepi16_epi32(short2);
302		32766	cplxValue1 = _mm256_cvtepi32_ps(int1);
303		32766	cplxValue2 = _mm256_cvtepi32_ps(int2);
304
305		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
306		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
307
308		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
309		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
310
311		32766	result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
312		32766	result = _mm256_permutevar8x32_ps(result, idx);
313
314		32766	result = _mm256_sqrt_ps(result); // Square root the values
315
316			_mm256_storeu_ps(magnitudeVectorPtr, result);
317
318		32766	magnitudeVectorPtr += 8;
319			}
320
321		2	number = eighthPoints * 8;
322		2	magnitudeVectorPtr = &magnitudeVector[number];
323		2	complexVectorPtr = (const int16_t*)&complexVector[number];
324	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
325		14	float val1Real = (float)(*complexVectorPtr++) / scalar;
326		14	float val1Imag = (float)(*complexVectorPtr++) / scalar;
327		14	magnitudeVectorPtr++ = sqrtf((val1Real val1Real) + (val1Imag * val1Imag));
328			}
329		2	}
330			#endif /* LV_HAVE_AVX2 */
331
332			#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_u_H */
333