GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	112	112	100.0%
Functions:	5	5	100.0%
Branches:	18	18	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_8ic_s32f_deinterleave_real_32f
    
       *
    
       * \b Overview
    
       *
    
       * Deinterleaves the complex 8-bit char vector into just the real (I)
    
       * vector, converts the samples to floats, and divides the results by
    
       * the scalar factor.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_8ic_s32f_deinterleave_real_32f(float* iBuffer, const lv_8sc_t* complexVector,
    
       * const float scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li scalar: The scalar value used to divide the floating point results.
    
       * \li num_points: The number of complex data values to be deinterleaved.
    
       *
    
       * \b Outputs
    
       * \li iBuffer: The I buffer output data.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * volk_8ic_s32f_deinterleave_real_32f();
    
       *
    
       * volk_free(x);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
    
      #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      static inline void
    
      2
      volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
    
                                                 const lv_8sc_t* complexVector,
    
                                                 const float scalar,
    
                                                 unsigned int num_points)
    
      {
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int sixteenthPoints = num_points / 16;
    
          __m256 iFloatValue;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m256 invScalar = _mm256_set1_ps(iScalar);
    
          __m256i complexVal, iIntVal;
    
      2
          int8_t* complexVectorPtr = (int8_t*)complexVector;
    
      2
          __m256i moveMask = _mm256_set_epi8(0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             14,
    
                                             12,
    
                                             10,
    
                                             8,
    
                                             6,
    
                                             4,
    
                                             2,
    
                                             0,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             14,
    
                                             12,
    
                                             10,
    
                                             8,
    
                                             6,
    
                                             4,
    
                                             2,
    
                                             0);
    
        2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.

      16384
          for (; number < sixteenthPoints; number++) {
    
      16382
              complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
    
      16382
              complexVectorPtr += 32;
    
      16382
              complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
    
      32764
              iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
    
      16382
              iFloatValue = _mm256_cvtepi32_ps(iIntVal);
    
      16382
              iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
    
              _mm256_store_ps(iBufferPtr, iFloatValue);
    
      16382
              iBufferPtr += 8;
    
      16382
              complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
    
      32764
              iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
    
      16382
              iFloatValue = _mm256_cvtepi32_ps(iIntVal);
    
      16382
              iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
    
              _mm256_store_ps(iBufferPtr, iFloatValue);
    
      16382
              iBufferPtr += 8;
    
          }
    
      2
          number = sixteenthPoints * 16;
    
        2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.

      32
          for (; number < num_points; number++) {
    
      30
              *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
    
      30
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE4_1
    
      #include <smmintrin.h>
    
      static inline void
    
      2
      volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
    
                                                   const lv_8sc_t* complexVector,
    
                                                   const float scalar,
    
                                                   unsigned int num_points)
    
      {
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
          __m128 iFloatValue;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m128 invScalar = _mm_set_ps1(iScalar);
    
          __m128i complexVal, iIntVal;
    
      2
          int8_t* complexVectorPtr = (int8_t*)complexVector;
    
      2
          __m128i moveMask = _mm_set_epi8(
    
              0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
    
      32766
              complexVectorPtr += 16;
    
      32766
              complexVal = _mm_shuffle_epi8(complexVal, moveMask);
    
      32766
              iIntVal = _mm_cvtepi8_epi32(complexVal);
    
      32766
              iFloatValue = _mm_cvtepi32_ps(iIntVal);
    
      32766
              iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
    
              _mm_store_ps(iBufferPtr, iFloatValue);
    
      32766
              iBufferPtr += 4;
    
      32766
              complexVal = _mm_srli_si128(complexVal, 4);
    
      32766
              iIntVal = _mm_cvtepi8_epi32(complexVal);
    
      32766
              iFloatValue = _mm_cvtepi32_ps(iIntVal);
    
      32766
              iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
    
              _mm_store_ps(iBufferPtr, iFloatValue);
    
      32766
              iBufferPtr += 4;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
    
      14
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE4_1 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      static inline void
    
      2
      volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer,
    
                                                const lv_8sc_t* complexVector,
    
                                                const float scalar,
    
                                                unsigned int num_points)
    
      {
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 iValue;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m128 invScalar = _mm_set_ps1(iScalar);
    
      2
          int8_t* complexVectorPtr = (int8_t*)complexVector;
    
          __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              floatBuffer[0] = (float)(*complexVectorPtr);
    
      65534
              complexVectorPtr += 2;
    
      65534
              floatBuffer[1] = (float)(*complexVectorPtr);
    
      65534
              complexVectorPtr += 2;
    
      65534
              floatBuffer[2] = (float)(*complexVectorPtr);
    
      65534
              complexVectorPtr += 2;
    
      65534
              floatBuffer[3] = (float)(*complexVectorPtr);
    
      65534
              complexVectorPtr += 2;
    
      65534
              iValue = _mm_load_ps(floatBuffer);
    
      65534
              iValue = _mm_mul_ps(iValue, invScalar);
    
              _mm_store_ps(iBufferPtr, iValue);
    
      65534
              iBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
    
      6
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      static inline void
    
      2
      volk_8ic_s32f_deinterleave_real_32f_generic(float* iBuffer,
    
                                                  const lv_8sc_t* complexVector,
    
                                                  const float scalar,
    
                                                  unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const int8_t* complexVectorPtr = (const int8_t*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          const float invScalar = 1.0 / scalar;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
    
      262142
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
    
      #ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
    
      #define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      static inline void
    
      2
      volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
    
                                                 const lv_8sc_t* complexVector,
    
                                                 const float scalar,
    
                                                 unsigned int num_points)
    
      {
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int sixteenthPoints = num_points / 16;
    
          __m256 iFloatValue;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m256 invScalar = _mm256_set1_ps(iScalar);
    
          __m256i complexVal, iIntVal;
    
          __m128i hcomplexVal;
    
      2
          int8_t* complexVectorPtr = (int8_t*)complexVector;
    
      2
          __m256i moveMask = _mm256_set_epi8(0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             14,
    
                                             12,
    
                                             10,
    
                                             8,
    
                                             6,
    
                                             4,
    
                                             2,
    
                                             0,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             0x80,
    
                                             14,
    
                                             12,
    
                                             10,
    
                                             8,
    
                                             6,
    
                                             4,
    
                                             2,
    
                                             0);
    
        2/2✓ Branch 0 taken 16382 times.
✓ Branch 1 taken 2 times.

      16384
          for (; number < sixteenthPoints; number++) {
    
      16382
              complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
    
      16382
              complexVectorPtr += 32;
    
      16382
              complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
    
      16382
              hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
    
      16382
              iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
    
      16382
              iFloatValue = _mm256_cvtepi32_ps(iIntVal);
    
      16382
              iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
    
              _mm256_storeu_ps(iBufferPtr, iFloatValue);
    
      16382
              iBufferPtr += 8;
    
      16382
              hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
    
      16382
              iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
    
      16382
              iFloatValue = _mm256_cvtepi32_ps(iIntVal);
    
      16382
              iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
    
              _mm256_storeu_ps(iBufferPtr, iFloatValue);
    
      16382
              iBufferPtr += 8;
    
          }
    
      2
          number = sixteenthPoints * 16;
    
        2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 2 times.

      32
          for (; number < num_points; number++) {
    
      30
              *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
    
      30
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_8ic_s32f_deinterleave_real_32f
12			*
13			* \b Overview
14			*
15			* Deinterleaves the complex 8-bit char vector into just the real (I)
16			* vector, converts the samples to floats, and divides the results by
17			* the scalar factor.
18			*
19			* <b>Dispatcher Prototype</b>
20			* \code
21			* void volk_8ic_s32f_deinterleave_real_32f(float* iBuffer, const lv_8sc_t* complexVector,
22			* const float scalar, unsigned int num_points) \endcode
23			*
24			* \b Inputs
25			* \li complexVector: The complex input vector.
26			* \li scalar: The scalar value used to divide the floating point results.
27			* \li num_points: The number of complex data values to be deinterleaved.
28			*
29			* \b Outputs
30			* \li iBuffer: The I buffer output data.
31			*
32			* \b Example
33			* \code
34			* int N = 10000;
35			*
36			* volk_8ic_s32f_deinterleave_real_32f();
37			*
38			* volk_free(x);
39			* \endcode
40			*/
41
42			#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
43			#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
44
45			#include <inttypes.h>
46			#include <stdio.h>
47			#include <volk/volk_common.h>
48
49			#ifdef LV_HAVE_AVX2
50			#include <immintrin.h>
51
52			static inline void
53		2	volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
54			const lv_8sc_t* complexVector,
55			const float scalar,
56			unsigned int num_points)
57			{
58		2	float* iBufferPtr = iBuffer;
59
60		2	unsigned int number = 0;
61		2	const unsigned int sixteenthPoints = num_points / 16;
62			__m256 iFloatValue;
63
64		2	const float iScalar = 1.0 / scalar;
65		2	__m256 invScalar = _mm256_set1_ps(iScalar);
66			__m256i complexVal, iIntVal;
67		2	int8_t* complexVectorPtr = (int8_t*)complexVector;
68
69		2	__m256i moveMask = _mm256_set_epi8(0x80,
70			0x80,
71			0x80,
72			0x80,
73			0x80,
74			0x80,
75			0x80,
76			0x80,
77			14,
78			12,
79			10,
80			8,
81			6,
82			4,
83			2,
84			0,
85			0x80,
86			0x80,
87			0x80,
88			0x80,
89			0x80,
90			0x80,
91			0x80,
92			0x80,
93			14,
94			12,
95			10,
96			8,
97			6,
98			4,
99			2,
100			0);
101	2/2 ✓ Branch 0 taken 16382 times. ✓ Branch 1 taken 2 times.	16384	for (; number < sixteenthPoints; number++) {
102		16382	complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
103		16382	complexVectorPtr += 32;
104		16382	complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
105
106		32764	iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
107		16382	iFloatValue = _mm256_cvtepi32_ps(iIntVal);
108		16382	iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
109			_mm256_store_ps(iBufferPtr, iFloatValue);
110		16382	iBufferPtr += 8;
111
112		16382	complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
113		32764	iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
114		16382	iFloatValue = _mm256_cvtepi32_ps(iIntVal);
115		16382	iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
116			_mm256_store_ps(iBufferPtr, iFloatValue);
117		16382	iBufferPtr += 8;
118			}
119
120		2	number = sixteenthPoints * 16;
121	2/2 ✓ Branch 0 taken 30 times. ✓ Branch 1 taken 2 times.	32	for (; number < num_points; number++) {
122		30	iBufferPtr++ = (float)(complexVectorPtr++) * iScalar;
123		30	complexVectorPtr++;
124			}
125		2	}
126			#endif /* LV_HAVE_AVX2 */
127
128
129			#ifdef LV_HAVE_SSE4_1
130			#include <smmintrin.h>
131
132			static inline void
133		2	volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
134			const lv_8sc_t* complexVector,
135			const float scalar,
136			unsigned int num_points)
137			{
138		2	float* iBufferPtr = iBuffer;
139
140		2	unsigned int number = 0;
141		2	const unsigned int eighthPoints = num_points / 8;
142			__m128 iFloatValue;
143
144		2	const float iScalar = 1.0 / scalar;
145		2	__m128 invScalar = _mm_set_ps1(iScalar);
146			__m128i complexVal, iIntVal;
147		2	int8_t* complexVectorPtr = (int8_t*)complexVector;
148
149		2	__m128i moveMask = _mm_set_epi8(
150			0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
151
152	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
153		32766	complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
154		32766	complexVectorPtr += 16;
155		32766	complexVal = _mm_shuffle_epi8(complexVal, moveMask);
156
157		32766	iIntVal = _mm_cvtepi8_epi32(complexVal);
158		32766	iFloatValue = _mm_cvtepi32_ps(iIntVal);
159
160		32766	iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
161
162			_mm_store_ps(iBufferPtr, iFloatValue);
163
164		32766	iBufferPtr += 4;
165
166		32766	complexVal = _mm_srli_si128(complexVal, 4);
167		32766	iIntVal = _mm_cvtepi8_epi32(complexVal);
168		32766	iFloatValue = _mm_cvtepi32_ps(iIntVal);
169
170		32766	iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
171
172			_mm_store_ps(iBufferPtr, iFloatValue);
173
174		32766	iBufferPtr += 4;
175			}
176
177		2	number = eighthPoints * 8;
178	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
179		14	iBufferPtr++ = (float)(complexVectorPtr++) * iScalar;
180		14	complexVectorPtr++;
181			}
182		2	}
183			#endif /* LV_HAVE_SSE4_1 */
184
185
186			#ifdef LV_HAVE_SSE
187			#include <xmmintrin.h>
188
189			static inline void
190		2	volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer,
191			const lv_8sc_t* complexVector,
192			const float scalar,
193			unsigned int num_points)
194			{
195		2	float* iBufferPtr = iBuffer;
196
197		2	unsigned int number = 0;
198		2	const unsigned int quarterPoints = num_points / 4;
199			__m128 iValue;
200
201		2	const float iScalar = 1.0 / scalar;
202		2	__m128 invScalar = _mm_set_ps1(iScalar);
203		2	int8_t* complexVectorPtr = (int8_t*)complexVector;
204
205			__VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
206
207	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
208		65534	floatBuffer[0] = (float)(*complexVectorPtr);
209		65534	complexVectorPtr += 2;
210		65534	floatBuffer[1] = (float)(*complexVectorPtr);
211		65534	complexVectorPtr += 2;
212		65534	floatBuffer[2] = (float)(*complexVectorPtr);
213		65534	complexVectorPtr += 2;
214		65534	floatBuffer[3] = (float)(*complexVectorPtr);
215		65534	complexVectorPtr += 2;
216
217		65534	iValue = _mm_load_ps(floatBuffer);
218
219		65534	iValue = _mm_mul_ps(iValue, invScalar);
220
221			_mm_store_ps(iBufferPtr, iValue);
222
223		65534	iBufferPtr += 4;
224			}
225
226		2	number = quarterPoints * 4;
227	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
228		6	iBufferPtr++ = (float)(complexVectorPtr++) * iScalar;
229		6	complexVectorPtr++;
230			}
231		2	}
232			#endif /* LV_HAVE_SSE */
233
234
235			#ifdef LV_HAVE_GENERIC
236
237			static inline void
238		2	volk_8ic_s32f_deinterleave_real_32f_generic(float* iBuffer,
239			const lv_8sc_t* complexVector,
240			const float scalar,
241			unsigned int num_points)
242			{
243		2	unsigned int number = 0;
244		2	const int8_t* complexVectorPtr = (const int8_t*)complexVector;
245		2	float* iBufferPtr = iBuffer;
246		2	const float invScalar = 1.0 / scalar;
247	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
248		262142	iBufferPtr++ = ((float)(complexVectorPtr++)) * invScalar;
249		262142	complexVectorPtr++;
250			}
251		2	}
252			#endif /* LV_HAVE_GENERIC */
253
254
255			#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
256
257			#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
258			#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
259
260			#include <inttypes.h>
261			#include <stdio.h>
262			#include <volk/volk_common.h>
263
264			#ifdef LV_HAVE_AVX2
265			#include <immintrin.h>
266
267			static inline void
268		2	volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
269			const lv_8sc_t* complexVector,
270			const float scalar,
271			unsigned int num_points)
272			{
273		2	float* iBufferPtr = iBuffer;
274
275		2	unsigned int number = 0;
276		2	const unsigned int sixteenthPoints = num_points / 16;
277			__m256 iFloatValue;
278
279		2	const float iScalar = 1.0 / scalar;
280		2	__m256 invScalar = _mm256_set1_ps(iScalar);
281			__m256i complexVal, iIntVal;
282			__m128i hcomplexVal;
283		2	int8_t* complexVectorPtr = (int8_t*)complexVector;
284
285		2	__m256i moveMask = _mm256_set_epi8(0x80,
286			0x80,
287			0x80,
288			0x80,
289			0x80,
290			0x80,
291			0x80,
292			0x80,
293			14,
294			12,
295			10,
296			8,
297			6,
298			4,
299			2,
300			0,
301			0x80,
302			0x80,
303			0x80,
304			0x80,
305			0x80,
306			0x80,
307			0x80,
308			0x80,
309			14,
310			12,
311			10,
312			8,
313			6,
314			4,
315			2,
316			0);
317
318	2/2 ✓ Branch 0 taken 16382 times. ✓ Branch 1 taken 2 times.	16384	for (; number < sixteenthPoints; number++) {
319		16382	complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
320		16382	complexVectorPtr += 32;
321		16382	complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
322
323		16382	hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
324		16382	iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
325		16382	iFloatValue = _mm256_cvtepi32_ps(iIntVal);
326
327		16382	iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
328
329			_mm256_storeu_ps(iBufferPtr, iFloatValue);
330
331		16382	iBufferPtr += 8;
332
333		16382	hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
334		16382	iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
335		16382	iFloatValue = _mm256_cvtepi32_ps(iIntVal);
336
337		16382	iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
338
339			_mm256_storeu_ps(iBufferPtr, iFloatValue);
340
341		16382	iBufferPtr += 8;
342			}
343
344		2	number = sixteenthPoints * 16;
345	2/2 ✓ Branch 0 taken 30 times. ✓ Branch 1 taken 2 times.	32	for (; number < num_points; number++) {
346		30	iBufferPtr++ = (float)(complexVectorPtr++) * iScalar;
347		30	complexVectorPtr++;
348			}
349		2	}
350			#endif /* LV_HAVE_AVX2 */
351
352
353			#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
354