GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_deinterleave_32f_x2.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	72	72	100.0%
Functions:	4	4	100.0%
Branches:	14	14	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_deinterleave_32f_x2
    
       *
    
       * \b Overview
    
       *
    
       * Deinterleaves the complex floating point vector into I & Q vector
    
       * data.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_deinterleave_32f_x2(float* iBuffer, float* qBuffer, const lv_32fc_t*
    
       * complexVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li num_points: The number of complex data values to be deinterleaved.
    
       *
    
       * \b Outputs
    
       * \li iBuffer: The I buffer output data.
    
       * \li qBuffer: The Q buffer output data.
    
       *
    
       * \b Example
    
       * Generate complex numbers around the top half of the unit circle and
    
       * deinterleave in to real and imaginary buffers.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   float* re = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* im = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       float real = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *   }
    
       *
    
       *   volk_32fc_deinterleave_32f_x2(re, im, in, N);
    
       *
    
       *   printf("          re  | im\n");
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out(%i) = %+.1f | %+.1f\n", ii, re[ii], im[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(re);
    
       *   volk_free(im);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
    
      #define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer,
    
                                                             float* qBuffer,
    
                                                             const lv_32fc_t* complexVector,
    
                                                             unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          float* qBufferPtr = qBuffer;
    
      2
          unsigned int number = 0;
    
          // Mask for real and imaginary parts
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
          __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
    
      32766
              complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
    
              // Arrange in i1i2i3i4 format
    
      32766
              iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
    
              // Arrange in q1q2q3q4 format
    
      32766
              qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
    
              _mm256_store_ps(iBufferPtr, iValue);
    
              _mm256_store_ps(qBufferPtr, qValue);
    
      32766
              iBufferPtr += 8;
    
      32766
              qBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = *complexVectorPtr++;
    
      14
              *qBufferPtr++ = *complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer,
    
                                                             float* qBuffer,
    
                                                             const lv_32fc_t* complexVector,
    
                                                             unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          float* qBufferPtr = qBuffer;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 cplxValue1, cplxValue2, iValue, qValue;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
              // Arrange in i1i2i3i4 format
    
      65534
              iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
              // Arrange in q1q2q3q4 format
    
      65534
              qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
    
              _mm_store_ps(iBufferPtr, iValue);
    
              _mm_store_ps(qBufferPtr, qValue);
    
      65534
              iBufferPtr += 4;
    
      65534
              qBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *iBufferPtr++ = *complexVectorPtr++;
    
      6
              *qBufferPtr++ = *complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer,
    
                                                            float* qBuffer,
    
                                                            const lv_32fc_t* complexVector,
    
                                                            unsigned int num_points)
    
      {
    
          unsigned int number = 0;
    
          unsigned int quarter_points = num_points / 4;
    
          const float* complexVectorPtr = (float*)complexVector;
    
          float* iBufferPtr = iBuffer;
    
          float* qBufferPtr = qBuffer;
    
          float32x4x2_t complexInput;
    
          for (number = 0; number < quarter_points; number++) {
    
              complexInput = vld2q_f32(complexVectorPtr);
    
              vst1q_f32(iBufferPtr, complexInput.val[0]);
    
              vst1q_f32(qBufferPtr, complexInput.val[1]);
    
              complexVectorPtr += 8;
    
              iBufferPtr += 4;
    
              qBufferPtr += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              *iBufferPtr++ = *complexVectorPtr++;
    
              *qBufferPtr++ = *complexVectorPtr++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer,
    
                                                               float* qBuffer,
    
                                                               const lv_32fc_t* complexVector,
    
                                                               unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          float* qBufferPtr = qBuffer;
    
          unsigned int number;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *iBufferPtr++ = *complexVectorPtr++;
    
      262142
              *qBufferPtr++ = *complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
    
      #ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
    
      #define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
    
                                                             float* qBuffer,
    
                                                             const lv_32fc_t* complexVector,
    
                                                             unsigned int num_points)
    
      {
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
      2
          float* qBufferPtr = qBuffer;
    
      2
          unsigned int number = 0;
    
          // Mask for real and imaginary parts
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
          __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
    
      32766
              complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
    
              // Arrange in i1i2i3i4 format
    
      32766
              iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
    
              // Arrange in q1q2q3q4 format
    
      32766
              qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
    
              _mm256_storeu_ps(iBufferPtr, iValue);
    
              _mm256_storeu_ps(qBufferPtr, qValue);
    
      32766
              iBufferPtr += 8;
    
      32766
              qBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = *complexVectorPtr++;
    
      14
              *qBufferPtr++ = *complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_deinterleave_32f_x2
12			*
13			* \b Overview
14			*
15			* Deinterleaves the complex floating point vector into I & Q vector
16			* data.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_deinterleave_32f_x2(float* iBuffer, float* qBuffer, const lv_32fc_t*
21			* complexVector, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector.
25			* \li num_points: The number of complex data values to be deinterleaved.
26			*
27			* \b Outputs
28			* \li iBuffer: The I buffer output data.
29			* \li qBuffer: The Q buffer output data.
30			*
31			* \b Example
32			* Generate complex numbers around the top half of the unit circle and
33			* deinterleave in to real and imaginary buffers.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
38			* float* re = (float)volk_malloc(sizeof(float)N, alignment);
39			* float* im = (float)volk_malloc(sizeof(float)N, alignment);
40			*
41			* for(unsigned int ii = 0; ii < N; ++ii){
42			* float real = 2.f * ((float)ii / (float)N) - 1.f;
43			* float imag = std::sqrt(1.f - real * real);
44			* in[ii] = lv_cmake(real, imag);
45			* }
46			*
47			* volk_32fc_deinterleave_32f_x2(re, im, in, N);
48			*
49			* printf(" re \| im\n");
50			* for(unsigned int ii = 0; ii < N; ++ii){
51			* printf("out(%i) = %+.1f \| %+.1f\n", ii, re[ii], im[ii]);
52			* }
53			*
54			* volk_free(in);
55			* volk_free(re);
56			* volk_free(im);
57			* \endcode
58			*/
59
60			#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
61			#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
62
63			#include <inttypes.h>
64			#include <stdio.h>
65
66			#ifdef LV_HAVE_AVX
67			#include <immintrin.h>
68		2	static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer,
69			float* qBuffer,
70			const lv_32fc_t* complexVector,
71			unsigned int num_points)
72			{
73		2	const float* complexVectorPtr = (float*)complexVector;
74		2	float* iBufferPtr = iBuffer;
75		2	float* qBufferPtr = qBuffer;
76
77		2	unsigned int number = 0;
78			// Mask for real and imaginary parts
79		2	const unsigned int eighthPoints = num_points / 8;
80			__m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
81	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
82		32766	cplxValue1 = _mm256_load_ps(complexVectorPtr);
83		32766	complexVectorPtr += 8;
84
85		32766	cplxValue2 = _mm256_load_ps(complexVectorPtr);
86		32766	complexVectorPtr += 8;
87
88		32766	complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
89		32766	complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
90
91			// Arrange in i1i2i3i4 format
92		32766	iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
93			// Arrange in q1q2q3q4 format
94		32766	qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
95
96			_mm256_store_ps(iBufferPtr, iValue);
97			_mm256_store_ps(qBufferPtr, qValue);
98
99		32766	iBufferPtr += 8;
100		32766	qBufferPtr += 8;
101			}
102
103		2	number = eighthPoints * 8;
104	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
105		14	iBufferPtr++ = complexVectorPtr++;
106		14	qBufferPtr++ = complexVectorPtr++;
107			}
108		2	}
109			#endif /* LV_HAVE_AVX */
110
111			#ifdef LV_HAVE_SSE
112			#include <xmmintrin.h>
113
114		2	static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer,
115			float* qBuffer,
116			const lv_32fc_t* complexVector,
117			unsigned int num_points)
118			{
119		2	const float* complexVectorPtr = (float*)complexVector;
120		2	float* iBufferPtr = iBuffer;
121		2	float* qBufferPtr = qBuffer;
122
123		2	unsigned int number = 0;
124		2	const unsigned int quarterPoints = num_points / 4;
125			__m128 cplxValue1, cplxValue2, iValue, qValue;
126	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
127		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
128		65534	complexVectorPtr += 4;
129
130		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
131		65534	complexVectorPtr += 4;
132
133			// Arrange in i1i2i3i4 format
134		65534	iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
135			// Arrange in q1q2q3q4 format
136		65534	qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
137
138			_mm_store_ps(iBufferPtr, iValue);
139			_mm_store_ps(qBufferPtr, qValue);
140
141		65534	iBufferPtr += 4;
142		65534	qBufferPtr += 4;
143			}
144
145		2	number = quarterPoints * 4;
146	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
147		6	iBufferPtr++ = complexVectorPtr++;
148		6	qBufferPtr++ = complexVectorPtr++;
149			}
150		2	}
151			#endif /* LV_HAVE_SSE */
152
153
154			#ifdef LV_HAVE_NEON
155			#include <arm_neon.h>
156
157			static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer,
158			float* qBuffer,
159			const lv_32fc_t* complexVector,
160			unsigned int num_points)
161			{
162			unsigned int number = 0;
163			unsigned int quarter_points = num_points / 4;
164			const float* complexVectorPtr = (float*)complexVector;
165			float* iBufferPtr = iBuffer;
166			float* qBufferPtr = qBuffer;
167			float32x4x2_t complexInput;
168
169			for (number = 0; number < quarter_points; number++) {
170			complexInput = vld2q_f32(complexVectorPtr);
171			vst1q_f32(iBufferPtr, complexInput.val[0]);
172			vst1q_f32(qBufferPtr, complexInput.val[1]);
173			complexVectorPtr += 8;
174			iBufferPtr += 4;
175			qBufferPtr += 4;
176			}
177
178			for (number = quarter_points * 4; number < num_points; number++) {
179			iBufferPtr++ = complexVectorPtr++;
180			qBufferPtr++ = complexVectorPtr++;
181			}
182			}
183			#endif /* LV_HAVE_NEON */
184
185
186			#ifdef LV_HAVE_GENERIC
187
188		2	static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer,
189			float* qBuffer,
190			const lv_32fc_t* complexVector,
191			unsigned int num_points)
192			{
193		2	const float* complexVectorPtr = (float*)complexVector;
194		2	float* iBufferPtr = iBuffer;
195		2	float* qBufferPtr = qBuffer;
196			unsigned int number;
197	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
198		262142	iBufferPtr++ = complexVectorPtr++;
199		262142	qBufferPtr++ = complexVectorPtr++;
200			}
201		2	}
202			#endif /* LV_HAVE_GENERIC */
203
204			#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
205
206
207			#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
208			#define INCLUDED_volk_32fc_deinterleave_32f_x2_u_H
209
210			#include <inttypes.h>
211			#include <stdio.h>
212
213			#ifdef LV_HAVE_AVX
214			#include <immintrin.h>
215		2	static inline void volk_32fc_deinterleave_32f_x2_u_avx(float* iBuffer,
216			float* qBuffer,
217			const lv_32fc_t* complexVector,
218			unsigned int num_points)
219			{
220		2	const float* complexVectorPtr = (float*)complexVector;
221		2	float* iBufferPtr = iBuffer;
222		2	float* qBufferPtr = qBuffer;
223
224		2	unsigned int number = 0;
225			// Mask for real and imaginary parts
226		2	const unsigned int eighthPoints = num_points / 8;
227			__m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
228	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
229		32766	cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
230		32766	complexVectorPtr += 8;
231
232		32766	cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
233		32766	complexVectorPtr += 8;
234
235		32766	complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
236		32766	complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
237
238			// Arrange in i1i2i3i4 format
239		32766	iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
240			// Arrange in q1q2q3q4 format
241		32766	qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
242
243			_mm256_storeu_ps(iBufferPtr, iValue);
244			_mm256_storeu_ps(qBufferPtr, qValue);
245
246		32766	iBufferPtr += 8;
247		32766	qBufferPtr += 8;
248			}
249
250		2	number = eighthPoints * 8;
251	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
252		14	iBufferPtr++ = complexVectorPtr++;
253		14	qBufferPtr++ = complexVectorPtr++;
254			}
255		2	}
256			#endif /* LV_HAVE_AVX */
257			#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_u_H */
258