GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_deinterleave_real_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	63	63	100.0%
Functions:	4	4	100.0%
Branches:	14	14	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_deinterleave_real_32f
    
       *
    
       * \b Overview
    
       *
    
       * Deinterleaves the complex floating point vector and return the real
    
       * part (inphase) of the samples.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_deinterleave_real_32f(float* iBuffer, const lv_32fc_t* complexVector,
    
       * unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li num_points: The number of complex data values to be deinterleaved.
    
       *
    
       * \b Outputs
    
       * \li iBuffer: The I buffer output data.
    
       *
    
       * \b Example
    
       * Generate complex numbers around the top half of the unit circle and
    
       * extract all of the real parts to a float buffer.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   float* re = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       float real = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *   }
    
       *
    
       *   volk_32fc_deinterleave_real_32f(re, in, N);
    
       *
    
       *   printf("          real part\n");
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out(%i) = %+.1f\n", ii, re[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(re);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
    
      #define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
          __m256 cplxValue1, cplxValue2;
    
          __m256 iValue;
    
      2
          __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_load_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
              // Arrange in i1i2i3i4 format
    
      32766
              iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
      32766
              iValue = _mm256_permutevar8x32_ps(iValue, idx);
    
              _mm256_store_ps(iBufferPtr, iValue);
    
      32766
              iBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = *complexVectorPtr++;
    
      14
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer,
    
                                                               const lv_32fc_t* complexVector,
    
                                                               unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
          __m128 cplxValue1, cplxValue2, iValue;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue1 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
      65534
              cplxValue2 = _mm_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 4;
    
              // Arrange in i1i2i3i4 format
    
      65534
              iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
              _mm_store_ps(iBufferPtr, iValue);
    
      65534
              iBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *iBufferPtr++ = *complexVectorPtr++;
    
      6
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer,
    
                                                                 const lv_32fc_t* complexVector,
    
                                                                 unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *iBufferPtr++ = *complexVectorPtr++;
    
      262142
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer,
    
                                                              const lv_32fc_t* complexVector,
    
                                                              unsigned int num_points)
    
      {
    
          unsigned int number = 0;
    
          unsigned int quarter_points = num_points / 4;
    
          const float* complexVectorPtr = (float*)complexVector;
    
          float* iBufferPtr = iBuffer;
    
          float32x4x2_t complexInput;
    
          for (number = 0; number < quarter_points; number++) {
    
              complexInput = vld2q_f32(complexVectorPtr);
    
              vst1q_f32(iBufferPtr, complexInput.val[0]);
    
              complexVectorPtr += 8;
    
              iBufferPtr += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              *iBufferPtr++ = *complexVectorPtr++;
    
              complexVectorPtr++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
    
      #ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
    
      #define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* complexVectorPtr = (const float*)complexVector;
    
      2
          float* iBufferPtr = iBuffer;
    
          __m256 cplxValue1, cplxValue2;
    
          __m256 iValue;
    
      2
          __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
              // Arrange in i1i2i3i4 format
    
      32766
              iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
    
      32766
              iValue = _mm256_permutevar8x32_ps(iValue, idx);
    
              _mm256_storeu_ps(iBufferPtr, iValue);
    
      32766
              iBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *iBufferPtr++ = *complexVectorPtr++;
    
      14
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_deinterleave_real_32f
12			*
13			* \b Overview
14			*
15			* Deinterleaves the complex floating point vector and return the real
16			* part (inphase) of the samples.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_deinterleave_real_32f(float* iBuffer, const lv_32fc_t* complexVector,
21			* unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector.
25			* \li num_points: The number of complex data values to be deinterleaved.
26			*
27			* \b Outputs
28			* \li iBuffer: The I buffer output data.
29			*
30			* \b Example
31			* Generate complex numbers around the top half of the unit circle and
32			* extract all of the real parts to a float buffer.
33			* \code
34			* int N = 10;
35			* unsigned int alignment = volk_get_alignment();
36			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
37			* float* re = (float)volk_malloc(sizeof(float)N, alignment);
38			*
39			* for(unsigned int ii = 0; ii < N; ++ii){
40			* float real = 2.f * ((float)ii / (float)N) - 1.f;
41			* float imag = std::sqrt(1.f - real * real);
42			* in[ii] = lv_cmake(real, imag);
43			* }
44			*
45			* volk_32fc_deinterleave_real_32f(re, in, N);
46			*
47			* printf(" real part\n");
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out(%i) = %+.1f\n", ii, re[ii]);
50			* }
51			*
52			* volk_free(in);
53			* volk_free(re);
54			* \endcode
55			*/
56
57			#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
58			#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
59
60			#include <inttypes.h>
61			#include <stdio.h>
62
63			#ifdef LV_HAVE_AVX2
64			#include <immintrin.h>
65
66		2	static inline void volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer,
67			const lv_32fc_t* complexVector,
68			unsigned int num_points)
69			{
70		2	unsigned int number = 0;
71		2	const unsigned int eighthPoints = num_points / 8;
72
73		2	const float* complexVectorPtr = (const float*)complexVector;
74		2	float* iBufferPtr = iBuffer;
75
76			__m256 cplxValue1, cplxValue2;
77			__m256 iValue;
78		2	__m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
79	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
80
81		32766	cplxValue1 = _mm256_load_ps(complexVectorPtr);
82		32766	complexVectorPtr += 8;
83
84		32766	cplxValue2 = _mm256_load_ps(complexVectorPtr);
85		32766	complexVectorPtr += 8;
86
87			// Arrange in i1i2i3i4 format
88		32766	iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
89		32766	iValue = _mm256_permutevar8x32_ps(iValue, idx);
90
91			_mm256_store_ps(iBufferPtr, iValue);
92
93		32766	iBufferPtr += 8;
94			}
95
96		2	number = eighthPoints * 8;
97	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
98		14	iBufferPtr++ = complexVectorPtr++;
99		14	complexVectorPtr++;
100			}
101		2	}
102			#endif /* LV_HAVE_AVX2 */
103
104			#ifdef LV_HAVE_SSE
105			#include <xmmintrin.h>
106
107		2	static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer,
108			const lv_32fc_t* complexVector,
109			unsigned int num_points)
110			{
111		2	unsigned int number = 0;
112		2	const unsigned int quarterPoints = num_points / 4;
113
114		2	const float* complexVectorPtr = (const float*)complexVector;
115		2	float* iBufferPtr = iBuffer;
116
117			__m128 cplxValue1, cplxValue2, iValue;
118	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
119
120		65534	cplxValue1 = _mm_load_ps(complexVectorPtr);
121		65534	complexVectorPtr += 4;
122
123		65534	cplxValue2 = _mm_load_ps(complexVectorPtr);
124		65534	complexVectorPtr += 4;
125
126			// Arrange in i1i2i3i4 format
127		65534	iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
128
129			_mm_store_ps(iBufferPtr, iValue);
130
131		65534	iBufferPtr += 4;
132			}
133
134		2	number = quarterPoints * 4;
135	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
136		6	iBufferPtr++ = complexVectorPtr++;
137		6	complexVectorPtr++;
138			}
139		2	}
140			#endif /* LV_HAVE_SSE */
141
142
143			#ifdef LV_HAVE_GENERIC
144
145		2	static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer,
146			const lv_32fc_t* complexVector,
147			unsigned int num_points)
148			{
149		2	unsigned int number = 0;
150		2	const float* complexVectorPtr = (float*)complexVector;
151		2	float* iBufferPtr = iBuffer;
152	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
153		262142	iBufferPtr++ = complexVectorPtr++;
154		262142	complexVectorPtr++;
155			}
156		2	}
157			#endif /* LV_HAVE_GENERIC */
158
159
160			#ifdef LV_HAVE_NEON
161			#include <arm_neon.h>
162
163			static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer,
164			const lv_32fc_t* complexVector,
165			unsigned int num_points)
166			{
167			unsigned int number = 0;
168			unsigned int quarter_points = num_points / 4;
169			const float* complexVectorPtr = (float*)complexVector;
170			float* iBufferPtr = iBuffer;
171			float32x4x2_t complexInput;
172
173			for (number = 0; number < quarter_points; number++) {
174			complexInput = vld2q_f32(complexVectorPtr);
175			vst1q_f32(iBufferPtr, complexInput.val[0]);
176			complexVectorPtr += 8;
177			iBufferPtr += 4;
178			}
179
180			for (number = quarter_points * 4; number < num_points; number++) {
181			iBufferPtr++ = complexVectorPtr++;
182			complexVectorPtr++;
183			}
184			}
185			#endif /* LV_HAVE_NEON */
186
187			#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
188
189
190			#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
191			#define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
192
193			#include <inttypes.h>
194			#include <stdio.h>
195
196			#ifdef LV_HAVE_AVX2
197			#include <immintrin.h>
198
199		2	static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
200			const lv_32fc_t* complexVector,
201			unsigned int num_points)
202			{
203		2	unsigned int number = 0;
204		2	const unsigned int eighthPoints = num_points / 8;
205
206		2	const float* complexVectorPtr = (const float*)complexVector;
207		2	float* iBufferPtr = iBuffer;
208
209			__m256 cplxValue1, cplxValue2;
210			__m256 iValue;
211		2	__m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
212	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
213
214		32766	cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
215		32766	complexVectorPtr += 8;
216
217		32766	cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
218		32766	complexVectorPtr += 8;
219
220			// Arrange in i1i2i3i4 format
221		32766	iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
222		32766	iValue = _mm256_permutevar8x32_ps(iValue, idx);
223
224			_mm256_storeu_ps(iBufferPtr, iValue);
225
226		32766	iBufferPtr += 8;
227			}
228
229		2	number = eighthPoints * 8;
230	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
231		14	iBufferPtr++ = complexVectorPtr++;
232		14	complexVectorPtr++;
233			}
234		2	}
235			#endif /* LV_HAVE_AVX2 */
236
237			#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */
238