GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32fc_deinterleave_real_64f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	60	60	100.0%
Functions:	4	4	100.0%
Branches:	14	14	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32fc_deinterleave_real_64f
    
       *
    
       * \b Overview
    
       *
    
       * Deinterleaves the complex floating point vector and return the real
    
       * part (inphase) of the samples that have been converted to doubles.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32fc_deinterleave_real_64f(double* iBuffer, const lv_32fc_t*
    
       * complexVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li complexVector: The complex input vector.
    
       * \li num_points: The number of complex data values to be deinterleaved.
    
       *
    
       * \b Outputs
    
       * \li iBuffer: The I buffer output data.
    
       *
    
       * \b Example
    
       * \code
    
       * Generate complex numbers around the top half of the unit circle and
    
       * extract all of the real parts to a double buffer.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   lv_32fc_t* in  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *   double* re = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       float real = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       float imag = std::sqrt(1.f - real * real);
    
       *       in[ii] = lv_cmake(real, imag);
    
       *   }
    
       *
    
       *   volk_32fc_deinterleave_real_64f(re, in, N);
    
       *
    
       *   printf("          real part\n");
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out(%i) = %+.1g\n", ii, re[ii]);
    
       *   }
    
       *
    
       *   volk_free(in);
    
       *   volk_free(re);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
    
      #define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_real_64f_a_avx2(double* iBuffer,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          double* iBufferPtr = iBuffer;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m256 cplxValue;
    
          __m128 fVal;
    
          __m256d dVal;
    
      2
          __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue = _mm256_load_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 8;
    
              // Arrange in i1i2i1i2 format
    
      65534
              cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
    
      65534
              fVal = _mm256_extractf128_ps(cplxValue, 0);
    
      65534
              dVal = _mm256_cvtps_pd(fVal);
    
              _mm256_store_pd(iBufferPtr, dVal);
    
      65534
              iBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *iBufferPtr++ = (double)*complexVectorPtr++;
    
      6
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          double* iBufferPtr = iBuffer;
    
      2
          const unsigned int halfPoints = num_points / 2;
    
          __m128 cplxValue, fVal;
    
          __m128d dVal;
    
        2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.

      131072
          for (; number < halfPoints; number++) {
    
      131070
              cplxValue = _mm_load_ps(complexVectorPtr);
    
      131070
              complexVectorPtr += 4;
    
              // Arrange in i1i2i1i2 format
    
      131070
              fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
    
      131070
              dVal = _mm_cvtps_pd(fVal);
    
              _mm_store_pd(iBufferPtr, dVal);
    
      131070
              iBufferPtr += 2;
    
          }
    
      2
          number = halfPoints * 2;
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.

      4
          for (; number < num_points; number++) {
    
      2
              *iBufferPtr++ = (double)*complexVectorPtr++;
    
      2
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32fc_deinterleave_real_64f_generic(double* iBuffer,
    
                                                                 const lv_32fc_t* complexVector,
    
                                                                 unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          double* iBufferPtr = iBuffer;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *iBufferPtr++ = (double)*complexVectorPtr++;
    
      262142
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_NEONV8
    
      #include <arm_neon.h>
    
      static inline void volk_32fc_deinterleave_real_64f_neon(double* iBuffer,
    
                                                              const lv_32fc_t* complexVector,
    
                                                              unsigned int num_points)
    
      {
    
          unsigned int number = 0;
    
          unsigned int quarter_points = num_points / 4;
    
          const float* complexVectorPtr = (float*)complexVector;
    
          double* iBufferPtr = iBuffer;
    
          float32x2x4_t complexInput;
    
          float64x2_t iVal1;
    
          float64x2_t iVal2;
    
          float64x2x2_t iVal;
    
          for (number = 0; number < quarter_points; number++) {
    
              // Load data into register
    
              complexInput = vld4_f32(complexVectorPtr);
    
              // Perform single to double precision conversion
    
              iVal1 = vcvt_f64_f32(complexInput.val[0]);
    
              iVal2 = vcvt_f64_f32(complexInput.val[2]);
    
              iVal.val[0] = iVal1;
    
              iVal.val[1] = iVal2;
    
              // Store results into memory buffer
    
              vst2q_f64(iBufferPtr, iVal);
    
              // Update pointers
    
              iBufferPtr += 4;
    
              complexVectorPtr += 8;
    
          }
    
          for (number = quarter_points * 4; number < num_points; number++) {
    
              *iBufferPtr++ = (double)*complexVectorPtr++;
    
              complexVectorPtr++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
    
      #ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
    
      #define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer,
    
                                                                const lv_32fc_t* complexVector,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* complexVectorPtr = (float*)complexVector;
    
      2
          double* iBufferPtr = iBuffer;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m256 cplxValue;
    
          __m128 fVal;
    
          __m256d dVal;
    
      2
          __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              cplxValue = _mm256_loadu_ps(complexVectorPtr);
    
      65534
              complexVectorPtr += 8;
    
              // Arrange in i1i2i1i2 format
    
      65534
              cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
    
      65534
              fVal = _mm256_extractf128_ps(cplxValue, 0);
    
      65534
              dVal = _mm256_cvtps_pd(fVal);
    
              _mm256_storeu_pd(iBufferPtr, dVal);
    
      65534
              iBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *iBufferPtr++ = (double)*complexVectorPtr++;
    
      6
              complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32fc_deinterleave_real_64f
12			*
13			* \b Overview
14			*
15			* Deinterleaves the complex floating point vector and return the real
16			* part (inphase) of the samples that have been converted to doubles.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32fc_deinterleave_real_64f(double* iBuffer, const lv_32fc_t*
21			* complexVector, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li complexVector: The complex input vector.
25			* \li num_points: The number of complex data values to be deinterleaved.
26			*
27			* \b Outputs
28			* \li iBuffer: The I buffer output data.
29			*
30			* \b Example
31			* \code
32			* Generate complex numbers around the top half of the unit circle and
33			* extract all of the real parts to a double buffer.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* lv_32fc_t* in = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
38			* double* re = (double)volk_malloc(sizeof(double)N, alignment);
39			*
40			* for(unsigned int ii = 0; ii < N; ++ii){
41			* float real = 2.f * ((float)ii / (float)N) - 1.f;
42			* float imag = std::sqrt(1.f - real * real);
43			* in[ii] = lv_cmake(real, imag);
44			* }
45			*
46			* volk_32fc_deinterleave_real_64f(re, in, N);
47			*
48			* printf(" real part\n");
49			* for(unsigned int ii = 0; ii < N; ++ii){
50			* printf("out(%i) = %+.1g\n", ii, re[ii]);
51			* }
52			*
53			* volk_free(in);
54			* volk_free(re);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
59			#define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
60
61			#include <inttypes.h>
62			#include <stdio.h>
63
64			#ifdef LV_HAVE_AVX2
65			#include <immintrin.h>
66
67		2	static inline void volk_32fc_deinterleave_real_64f_a_avx2(double* iBuffer,
68			const lv_32fc_t* complexVector,
69			unsigned int num_points)
70			{
71		2	unsigned int number = 0;
72
73		2	const float* complexVectorPtr = (float*)complexVector;
74		2	double* iBufferPtr = iBuffer;
75
76		2	const unsigned int quarterPoints = num_points / 4;
77			__m256 cplxValue;
78			__m128 fVal;
79			__m256d dVal;
80		2	__m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
81	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
82
83		65534	cplxValue = _mm256_load_ps(complexVectorPtr);
84		65534	complexVectorPtr += 8;
85
86			// Arrange in i1i2i1i2 format
87		65534	cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
88		65534	fVal = _mm256_extractf128_ps(cplxValue, 0);
89		65534	dVal = _mm256_cvtps_pd(fVal);
90			_mm256_store_pd(iBufferPtr, dVal);
91
92		65534	iBufferPtr += 4;
93			}
94
95		2	number = quarterPoints * 4;
96	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
97		6	iBufferPtr++ = (double)complexVectorPtr++;
98		6	complexVectorPtr++;
99			}
100		2	}
101			#endif /* LV_HAVE_AVX2 */
102
103			#ifdef LV_HAVE_SSE2
104			#include <emmintrin.h>
105
106		2	static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer,
107			const lv_32fc_t* complexVector,
108			unsigned int num_points)
109			{
110		2	unsigned int number = 0;
111
112		2	const float* complexVectorPtr = (float*)complexVector;
113		2	double* iBufferPtr = iBuffer;
114
115		2	const unsigned int halfPoints = num_points / 2;
116			__m128 cplxValue, fVal;
117			__m128d dVal;
118	2/2 ✓ Branch 0 taken 131070 times. ✓ Branch 1 taken 2 times.	131072	for (; number < halfPoints; number++) {
119
120		131070	cplxValue = _mm_load_ps(complexVectorPtr);
121		131070	complexVectorPtr += 4;
122
123			// Arrange in i1i2i1i2 format
124		131070	fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2, 0, 2, 0));
125		131070	dVal = _mm_cvtps_pd(fVal);
126			_mm_store_pd(iBufferPtr, dVal);
127
128		131070	iBufferPtr += 2;
129			}
130
131		2	number = halfPoints * 2;
132	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 2 times.	4	for (; number < num_points; number++) {
133		2	iBufferPtr++ = (double)complexVectorPtr++;
134		2	complexVectorPtr++;
135			}
136		2	}
137			#endif /* LV_HAVE_SSE */
138
139			#ifdef LV_HAVE_GENERIC
140
141		2	static inline void volk_32fc_deinterleave_real_64f_generic(double* iBuffer,
142			const lv_32fc_t* complexVector,
143			unsigned int num_points)
144			{
145		2	unsigned int number = 0;
146		2	const float* complexVectorPtr = (float*)complexVector;
147		2	double* iBufferPtr = iBuffer;
148	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
149		262142	iBufferPtr++ = (double)complexVectorPtr++;
150		262142	complexVectorPtr++;
151			}
152		2	}
153			#endif /* LV_HAVE_GENERIC */
154
155			#ifdef LV_HAVE_NEONV8
156			#include <arm_neon.h>
157
158			static inline void volk_32fc_deinterleave_real_64f_neon(double* iBuffer,
159			const lv_32fc_t* complexVector,
160			unsigned int num_points)
161			{
162			unsigned int number = 0;
163			unsigned int quarter_points = num_points / 4;
164			const float* complexVectorPtr = (float*)complexVector;
165			double* iBufferPtr = iBuffer;
166			float32x2x4_t complexInput;
167			float64x2_t iVal1;
168			float64x2_t iVal2;
169			float64x2x2_t iVal;
170
171			for (number = 0; number < quarter_points; number++) {
172			// Load data into register
173			complexInput = vld4_f32(complexVectorPtr);
174
175			// Perform single to double precision conversion
176			iVal1 = vcvt_f64_f32(complexInput.val[0]);
177			iVal2 = vcvt_f64_f32(complexInput.val[2]);
178			iVal.val[0] = iVal1;
179			iVal.val[1] = iVal2;
180
181			// Store results into memory buffer
182			vst2q_f64(iBufferPtr, iVal);
183
184			// Update pointers
185			iBufferPtr += 4;
186			complexVectorPtr += 8;
187			}
188
189			for (number = quarter_points * 4; number < num_points; number++) {
190			iBufferPtr++ = (double)complexVectorPtr++;
191			complexVectorPtr++;
192			}
193			}
194			#endif /* LV_HAVE_NEON */
195
196			#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
197
198			#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_u_H
199			#define INCLUDED_volk_32fc_deinterleave_real_64f_u_H
200
201			#include <inttypes.h>
202			#include <stdio.h>
203
204			#ifdef LV_HAVE_AVX2
205			#include <immintrin.h>
206
207		2	static inline void volk_32fc_deinterleave_real_64f_u_avx2(double* iBuffer,
208			const lv_32fc_t* complexVector,
209			unsigned int num_points)
210			{
211		2	unsigned int number = 0;
212
213		2	const float* complexVectorPtr = (float*)complexVector;
214		2	double* iBufferPtr = iBuffer;
215
216		2	const unsigned int quarterPoints = num_points / 4;
217			__m256 cplxValue;
218			__m128 fVal;
219			__m256d dVal;
220		2	__m256i idx = _mm256_set_epi32(0, 0, 0, 0, 6, 4, 2, 0);
221	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
222
223		65534	cplxValue = _mm256_loadu_ps(complexVectorPtr);
224		65534	complexVectorPtr += 8;
225
226			// Arrange in i1i2i1i2 format
227		65534	cplxValue = _mm256_permutevar8x32_ps(cplxValue, idx);
228		65534	fVal = _mm256_extractf128_ps(cplxValue, 0);
229		65534	dVal = _mm256_cvtps_pd(fVal);
230			_mm256_storeu_pd(iBufferPtr, dVal);
231
232		65534	iBufferPtr += 4;
233			}
234
235		2	number = quarterPoints * 4;
236	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
237		6	iBufferPtr++ = (double)complexVectorPtr++;
238		6	complexVectorPtr++;
239			}
240		2	}
241			#endif /* LV_HAVE_AVX2 */
242
243			#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_u_H */
244