GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	83	83	100.0%
Functions:	4	4	100.0%
Branches:	14	14	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_s32f_deinterleave_real_16i
    
       *
    
       * \b Overview
    
       *
    
       * Deinterleaves the complex floating point vector and return the real
    
       * part (inphase) of the samples scaled to 16-bit shorts.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_s32f_deinterleave_real_16i(int16_t* iBuffer, const lv_32fc_t*
    
       * complexVector, const float scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li scalar: The value to be multiplied against each of the input vectors..
    
       * \li num_points: The number of complex data values to be deinterleaved.
    
       *
    
       * \b Outputs
    
       * \li iBuffer: The I buffer output data.
    
       *
    
       * \b Example
    
       * Generate points around the unit circle and map them to integers with
    
       * magnitude 50 to preserve smallest deltas.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   int16_t* out = (int16_t*)volk_malloc(sizeof(int16_t)*N, alignment);
    
       *   float scale = 50.f;
    
       *
    
       *   for(unsigned int ii = 0; ii < N/2; ++ii){
    
       *       // Generate points around the unit circle
    
       *       float real = -4.f * ((float)ii / (float)N) + 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *       in[ii+N/2] = lv_cmake(-real, -imag);
    
       *   }
    
       *
    
       *   volk_32fc_s32f_deinterleave_real_16i(out, in, scale, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %i\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
    
      #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      static inline void
    
      2
      volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
    
                                                  const lv_32fc_t* complexVector,
    
                                                  const float scalar,
    
                                                  unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          int16_t* iBufferPtr = iBuffer;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
          __m256 cplxValue1, cplxValue2, iValue;
    
          __m256i a;
    
          __m128i b;
    
      2
          __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
              // Arrange in i1i2i3i4 format
    
      32766
              iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
      32766
              iValue = _mm256_mul_ps(iValue, vScalar);
    
      32766
              iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
    
      32766
              a = _mm256_cvtps_epi32(iValue);
    
      32766
              a = _mm256_packs_epi32(a, a);
    
      32766
              a = _mm256_permutevar8x32_epi32(a, idx);
    
      32766
              b = _mm256_extracti128_si256(a, 0);
    
              _mm_store_si128((__m128i*)iBufferPtr, b);
    
      32766
              iBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          iBufferPtr = &iBuffer[number];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
    
      14
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      static inline void
    
      2
      volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer,
    
                                                 const lv_32fc_t* complexVector,
    
                                                 const float scalar,
    
                                                 unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          int16_t* iBufferPtr = iBuffer;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
          __m128 cplxValue1, cplxValue2, iValue;
    
          __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
              // Arrange in i1i2i3i4 format
    
      65534
              iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
      65534
              iValue = _mm_mul_ps(iValue, vScalar);
    
              _mm_store_ps(floatBuffer, iValue);
    
      65534
              *iBufferPtr++ = (int16_t)(floatBuffer[0]);
    
      65534
              *iBufferPtr++ = (int16_t)(floatBuffer[1]);
    
      65534
              *iBufferPtr++ = (int16_t)(floatBuffer[2]);
    
      65534
              *iBufferPtr++ = (int16_t)(floatBuffer[3]);
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          iBufferPtr = &iBuffer[number];
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
    
      6
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      static inline void
    
      2
      volk_32fc_s32f_deinterleave_real_16i_generic(int16_t* iBuffer,
    
                                                   const lv_32fc_t* complexVector,
    
                                                   const float scalar,
    
                                                   unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          int16_t* iBufferPtr = iBuffer;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
    
      262142
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
    
      #ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
    
      #define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      static inline void
    
      2
      volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
    
                                                  const lv_32fc_t* complexVector,
    
                                                  const float scalar,
    
                                                  unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          int16_t* iBufferPtr = iBuffer;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
          __m256 cplxValue1, cplxValue2, iValue;
    
          __m256i a;
    
          __m128i b;
    
      2
          __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
              // Arrange in i1i2i3i4 format
    
      32766
              iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
      32766
              iValue = _mm256_mul_ps(iValue, vScalar);
    
      32766
              iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
    
      32766
              a = _mm256_cvtps_epi32(iValue);
    
      32766
              a = _mm256_packs_epi32(a, a);
    
      32766
              a = _mm256_permutevar8x32_epi32(a, idx);
    
      32766
              b = _mm256_extracti128_si256(a, 0);
    
              _mm_storeu_si128((__m128i*)iBufferPtr, b);
    
      32766
              iBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          iBufferPtr = &iBuffer[number];
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
    
      14
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_s32f_deinterleave_real_16i
12			*
13			* \b Overview
14			*
15			* Deinterleaves the complex floating point vector and return the real
16			* part (inphase) of the samples scaled to 16-bit shorts.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_s32f_deinterleave_real_16i(int16_t* iBuffer, const lv_32fc_t*
21			* complexVector, const float scalar, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector.
25			* \li scalar: The value to be multiplied against each of the input vectors..
26			* \li num_points: The number of complex data values to be deinterleaved.
27			*
28			* \b Outputs
29			* \li iBuffer: The I buffer output data.
30			*
31			* \b Example
32			* Generate points around the unit circle and map them to integers with
33			* magnitude 50 to preserve smallest deltas.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
38			* int16_t* out = (int16_t)volk_malloc(sizeof(int16_t)N, alignment);
39			* float scale = 50.f;
40			*
41			* for(unsigned int ii = 0; ii < N/2; ++ii){
42			* // Generate points around the unit circle
43			* float real = -4.f * ((float)ii / (float)N) + 1.f;
44			* float imag = std::sqrt(1.f - real * real);
45			* in[ii] = lv_cmake(real, imag);
46			* in[ii+N/2] = lv_cmake(-real, -imag);
47			* }
48			*
49			* volk_32fc_s32f_deinterleave_real_16i(out, in, scale, N);
50			*
51			* for(unsigned int ii = 0; ii < N; ++ii){
52			* printf("out[%u] = %i\n", ii, out[ii]);
53			* }
54			*
55			* volk_free(in);
56			* volk_free(out);
57			* \endcode
58			*/
59
60			#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
61			#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
62
63			#include <inttypes.h>
64			#include <stdio.h>
65			#include <volk/volk_common.h>
66
67
68			#ifdef LV_HAVE_AVX2
69			#include <immintrin.h>
70
71			static inline void
72		2	volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
73			const lv_32fc_t* complexVector,
74			const float scalar,
75			unsigned int num_points)
76			{
77		2	unsigned int number = 0;
78		2	const unsigned int eighthPoints = num_points / 8;
79
80		2	const float* complexVectorPtr = (float*)complexVector;
81		2	int16_t* iBufferPtr = iBuffer;
82
83		2	__m256 vScalar = _mm256_set1_ps(scalar);
84
85			__m256 cplxValue1, cplxValue2, iValue;
86			__m256i a;
87			__m128i b;
88
89		2	__m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
90
91	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
92		32766	cplxValue1 = _mm256_load_ps(complexVectorPtr);
93		32766	complexVectorPtr += 8;
94
95		32766	cplxValue2 = _mm256_load_ps(complexVectorPtr);
96		32766	complexVectorPtr += 8;
97
98			// Arrange in i1i2i3i4 format
99		32766	iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
100
101		32766	iValue = _mm256_mul_ps(iValue, vScalar);
102
103		32766	iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
104		32766	a = _mm256_cvtps_epi32(iValue);
105		32766	a = _mm256_packs_epi32(a, a);
106		32766	a = _mm256_permutevar8x32_epi32(a, idx);
107		32766	b = _mm256_extracti128_si256(a, 0);
108
109			_mm_store_si128((__m128i*)iBufferPtr, b);
110		32766	iBufferPtr += 8;
111			}
112
113		2	number = eighthPoints * 8;
114		2	iBufferPtr = &iBuffer[number];
115	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
116		14	iBufferPtr++ = (int16_t)(complexVectorPtr++ * scalar);
117		14	complexVectorPtr++;
118			}
119		2	}
120
121
122			#endif /* LV_HAVE_AVX2 */
123
124			#ifdef LV_HAVE_SSE
125			#include <xmmintrin.h>
126
127			static inline void
128		2	volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* iBuffer,
129			const lv_32fc_t* complexVector,
130			const float scalar,
131			unsigned int num_points)
132			{
133		2	unsigned int number = 0;
134		2	const unsigned int quarterPoints = num_points / 4;
135
136		2	const float* complexVectorPtr = (float*)complexVector;
137		2	int16_t* iBufferPtr = iBuffer;
138
139		2	__m128 vScalar = _mm_set_ps1(scalar);
140
141			__m128 cplxValue1, cplxValue2, iValue;
142
143			__VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
144
145	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
146		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
147		65534	complexVectorPtr += 4;
148
149		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
150		65534	complexVectorPtr += 4;
151
152			// Arrange in i1i2i3i4 format
153		65534	iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
154
155		65534	iValue = _mm_mul_ps(iValue, vScalar);
156
157			_mm_store_ps(floatBuffer, iValue);
158		65534	*iBufferPtr++ = (int16_t)(floatBuffer[0]);
159		65534	*iBufferPtr++ = (int16_t)(floatBuffer[1]);
160		65534	*iBufferPtr++ = (int16_t)(floatBuffer[2]);
161		65534	*iBufferPtr++ = (int16_t)(floatBuffer[3]);
162			}
163
164		2	number = quarterPoints * 4;
165		2	iBufferPtr = &iBuffer[number];
166	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
167		6	iBufferPtr++ = (int16_t)(complexVectorPtr++ * scalar);
168		6	complexVectorPtr++;
169			}
170		2	}
171
172			#endif /* LV_HAVE_SSE */
173
174
175			#ifdef LV_HAVE_GENERIC
176
177			static inline void
178		2	volk_32fc_s32f_deinterleave_real_16i_generic(int16_t* iBuffer,
179			const lv_32fc_t* complexVector,
180			const float scalar,
181			unsigned int num_points)
182			{
183		2	const float* complexVectorPtr = (float*)complexVector;
184		2	int16_t* iBufferPtr = iBuffer;
185		2	unsigned int number = 0;
186	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
187		262142	iBufferPtr++ = (int16_t)(complexVectorPtr++ * scalar);
188		262142	complexVectorPtr++;
189			}
190		2	}
191
192			#endif /* LV_HAVE_GENERIC */
193
194			#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
195
196			#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
197			#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
198
199			#include <inttypes.h>
200			#include <stdio.h>
201			#include <volk/volk_common.h>
202
203			#ifdef LV_HAVE_AVX2
204			#include <immintrin.h>
205
206			static inline void
207		2	volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
208			const lv_32fc_t* complexVector,
209			const float scalar,
210			unsigned int num_points)
211			{
212		2	unsigned int number = 0;
213		2	const unsigned int eighthPoints = num_points / 8;
214
215		2	const float* complexVectorPtr = (float*)complexVector;
216		2	int16_t* iBufferPtr = iBuffer;
217
218		2	__m256 vScalar = _mm256_set1_ps(scalar);
219
220			__m256 cplxValue1, cplxValue2, iValue;
221			__m256i a;
222			__m128i b;
223
224		2	__m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
225
226	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
227		32766	cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
228		32766	complexVectorPtr += 8;
229
230		32766	cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
231		32766	complexVectorPtr += 8;
232
233			// Arrange in i1i2i3i4 format
234		32766	iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
235
236		32766	iValue = _mm256_mul_ps(iValue, vScalar);
237
238		32766	iValue = _mm256_round_ps(iValue, _MM_FROUND_TO_ZERO);
239		32766	a = _mm256_cvtps_epi32(iValue);
240		32766	a = _mm256_packs_epi32(a, a);
241		32766	a = _mm256_permutevar8x32_epi32(a, idx);
242		32766	b = _mm256_extracti128_si256(a, 0);
243
244			_mm_storeu_si128((__m128i*)iBufferPtr, b);
245		32766	iBufferPtr += 8;
246			}
247
248		2	number = eighthPoints * 8;
249		2	iBufferPtr = &iBuffer[number];
250	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
251		14	iBufferPtr++ = (int16_t)(complexVectorPtr++ * scalar);
252		14	complexVectorPtr++;
253			}
254		2	}
255
256			#endif /* LV_HAVE_AVX2 */
257
258			#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H */
259