GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_x2_interleave_32fc.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	72	72	100.0%
Functions:	4	4	100.0%
Branches:	14	14	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_x2_interleave_32fc
    
       *
    
       * \b Overview
    
       *
    
       * Takes input vector iBuffer as the real (inphase) part and input
    
       * vector qBuffer as the imag (quadrature) part and combines them into
    
       * a complex output vector.
    
       *
    
       * c[i] = complex(a[i], b[i])
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_x2_interleave_32fc(lv_32fc_t* complexVector, const float* iBuffer, const
    
       * float* qBuffer, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li iBuffer: Input vector of samples for the real part.
    
       * \li qBuffer: Input vector of samples for the imaginary part.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li complexVector: The output vector of complex numbers.
    
       *
    
       * \b Example
    
       * Generate the top half of the unit circle with real points equally spaced.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* imag = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* real = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   lv_32fc_t* out = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       real[ii] = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       imag[ii] = std::sqrt(1.f - real[ii] * real[ii]);
    
       *   }
    
       *
    
       *   volk_32f_x2_interleave_32fc(out, imag, real, N);
    
       *
    
       *  for(unsigned int ii = 0; ii < N; ++ii){
    
       *      printf("out[%u] = %1.2f + %1.2fj\n", ii, std::real(out[ii]), std::imag(out[ii]));
    
       *  }
    
       *
    
       *   volk_free(imag);
    
       *   volk_free(real);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
    
      #define INCLUDED_volk_32f_x2_interleave_32fc_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector,
    
                                                           const float* iBuffer,
    
                                                           const float* qBuffer,
    
                                                           unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          float* complexVectorPtr = (float*)complexVector;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          const uint64_t eighthPoints = num_points / 8;
    
          __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              iValue = _mm256_load_ps(iBufferPtr);
    
      32766
              qValue = _mm256_load_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      32766
              cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      32766
              cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
    
      32766
              cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
    
              _mm256_store_ps(complexVectorPtr, cplxValue);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
    
              _mm256_store_ps(complexVectorPtr, cplxValue);
    
      32766
              complexVectorPtr += 8;
    
      32766
              iBufferPtr += 8;
    
      32766
              qBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *complexVectorPtr++ = *iBufferPtr++;
    
      14
              *complexVectorPtr++ = *qBufferPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAV_AVX */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector,
    
                                                           const float* iBuffer,
    
                                                           const float* qBuffer,
    
                                                           unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          float* complexVectorPtr = (float*)complexVector;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          const uint64_t quarterPoints = num_points / 4;
    
          __m128 iValue, qValue, cplxValue;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              iValue = _mm_load_ps(iBufferPtr);
    
      65534
              qValue = _mm_load_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      65534
              cplxValue = _mm_unpacklo_ps(iValue, qValue);
    
              _mm_store_ps(complexVectorPtr, cplxValue);
    
      65534
              complexVectorPtr += 4;
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      65534
              cplxValue = _mm_unpackhi_ps(iValue, qValue);
    
              _mm_store_ps(complexVectorPtr, cplxValue);
    
      65534
              complexVectorPtr += 4;
    
      65534
              iBufferPtr += 4;
    
      65534
              qBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *complexVectorPtr++ = *iBufferPtr++;
    
      6
              *complexVectorPtr++ = *qBufferPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector,
    
                                                          const float* iBuffer,
    
                                                          const float* qBuffer,
    
                                                          unsigned int num_points)
    
      {
    
          unsigned int quarter_points = num_points / 4;
    
          unsigned int number;
    
          float* complexVectorPtr = (float*)complexVector;
    
          float32x4x2_t complex_vec;
    
          for (number = 0; number < quarter_points; ++number) {
    
              complex_vec.val[0] = vld1q_f32(iBuffer);
    
              complex_vec.val[1] = vld1q_f32(qBuffer);
    
              vst2q_f32(complexVectorPtr, complex_vec);
    
              iBuffer += 4;
    
              qBuffer += 4;
    
              complexVectorPtr += 8;
    
          }
    
          for (number = quarter_points * 4; number < num_points; ++number) {
    
              *complexVectorPtr++ = *iBuffer++;
    
              *complexVectorPtr++ = *qBuffer++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector,
    
                                                             const float* iBuffer,
    
                                                             const float* qBuffer,
    
                                                             unsigned int num_points)
    
      {
    
      2
          float* complexVectorPtr = (float*)complexVector;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
          unsigned int number;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *complexVectorPtr++ = *iBufferPtr++;
    
      262142
              *complexVectorPtr++ = *qBufferPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
    
      #ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
    
      #define INCLUDED_volk_32f_x2_interleave_32fc_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
    
                                                           const float* iBuffer,
    
                                                           const float* qBuffer,
    
                                                           unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          float* complexVectorPtr = (float*)complexVector;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          const uint64_t eighthPoints = num_points / 8;
    
          __m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              iValue = _mm256_loadu_ps(iBufferPtr);
    
      32766
              qValue = _mm256_loadu_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      32766
              cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      32766
              cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
    
      32766
              cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
    
              _mm256_storeu_ps(complexVectorPtr, cplxValue);
    
      32766
              complexVectorPtr += 8;
    
      32766
              cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
    
              _mm256_storeu_ps(complexVectorPtr, cplxValue);
    
      32766
              complexVectorPtr += 8;
    
      32766
              iBufferPtr += 8;
    
      32766
              qBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *complexVectorPtr++ = *iBufferPtr++;
    
      14
              *complexVectorPtr++ = *qBufferPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_x2_interleave_32fc
12			*
13			* \b Overview
14			*
15			* Takes input vector iBuffer as the real (inphase) part and input
16			* vector qBuffer as the imag (quadrature) part and combines them into
17			* a complex output vector.
18			*
19			* c[i] = complex(a[i], b[i])
20			*
21			* <b>Dispatcher Prototype</b>
22			* \code
23			* void volk_32f_x2_interleave_32fc(lv_32fc_t* complexVector, const float* iBuffer, const
24			* float* qBuffer, unsigned int num_points) \endcode
25			*
26			* \b Inputs
27			* \li iBuffer: Input vector of samples for the real part.
28			* \li qBuffer: Input vector of samples for the imaginary part.
29			* \li num_points: The number of values in both input vectors.
30			*
31			* \b Outputs
32			* \li complexVector: The output vector of complex numbers.
33			*
34			* \b Example
35			* Generate the top half of the unit circle with real points equally spaced.
36			* \code
37			* int N = 10;
38			* unsigned int alignment = volk_get_alignment();
39			* float* imag = (float)volk_malloc(sizeof(float)N, alignment);
40			* float* real = (float)volk_malloc(sizeof(float)N, alignment);
41			* lv_32fc_t* out = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
42			*
43			* for(unsigned int ii = 0; ii < N; ++ii){
44			* real[ii] = 2.f * ((float)ii / (float)N) - 1.f;
45			* imag[ii] = std::sqrt(1.f - real[ii] * real[ii]);
46			* }
47			*
48			* volk_32f_x2_interleave_32fc(out, imag, real, N);
49			*
50			* for(unsigned int ii = 0; ii < N; ++ii){
51			* printf("out[%u] = %1.2f + %1.2fj\n", ii, std::real(out[ii]), std::imag(out[ii]));
52			* }
53			*
54			* volk_free(imag);
55			* volk_free(real);
56			* volk_free(out);
57			* \endcode
58			*/
59
60			#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
61			#define INCLUDED_volk_32f_x2_interleave_32fc_a_H
62
63			#include <inttypes.h>
64			#include <stdio.h>
65
66			#ifdef LV_HAVE_AVX
67			#include <immintrin.h>
68
69		2	static inline void volk_32f_x2_interleave_32fc_a_avx(lv_32fc_t* complexVector,
70			const float* iBuffer,
71			const float* qBuffer,
72			unsigned int num_points)
73			{
74		2	unsigned int number = 0;
75		2	float* complexVectorPtr = (float*)complexVector;
76		2	const float* iBufferPtr = iBuffer;
77		2	const float* qBufferPtr = qBuffer;
78
79		2	const uint64_t eighthPoints = num_points / 8;
80
81			__m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
82	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
83		32766	iValue = _mm256_load_ps(iBufferPtr);
84		32766	qValue = _mm256_load_ps(qBufferPtr);
85
86			// Interleaves the lower two values in the i and q variables into one buffer
87		32766	cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
88			// Interleaves the upper two values in the i and q variables into one buffer
89		32766	cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
90
91		32766	cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
92			_mm256_store_ps(complexVectorPtr, cplxValue);
93		32766	complexVectorPtr += 8;
94
95		32766	cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
96			_mm256_store_ps(complexVectorPtr, cplxValue);
97		32766	complexVectorPtr += 8;
98
99		32766	iBufferPtr += 8;
100		32766	qBufferPtr += 8;
101			}
102
103		2	number = eighthPoints * 8;
104	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
105		14	complexVectorPtr++ = iBufferPtr++;
106		14	complexVectorPtr++ = qBufferPtr++;
107			}
108		2	}
109
110			#endif /* LV_HAV_AVX */
111
112			#ifdef LV_HAVE_SSE
113			#include <xmmintrin.h>
114
115		2	static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector,
116			const float* iBuffer,
117			const float* qBuffer,
118			unsigned int num_points)
119			{
120		2	unsigned int number = 0;
121		2	float* complexVectorPtr = (float*)complexVector;
122		2	const float* iBufferPtr = iBuffer;
123		2	const float* qBufferPtr = qBuffer;
124
125		2	const uint64_t quarterPoints = num_points / 4;
126
127			__m128 iValue, qValue, cplxValue;
128	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
129		65534	iValue = _mm_load_ps(iBufferPtr);
130		65534	qValue = _mm_load_ps(qBufferPtr);
131
132			// Interleaves the lower two values in the i and q variables into one buffer
133		65534	cplxValue = _mm_unpacklo_ps(iValue, qValue);
134			_mm_store_ps(complexVectorPtr, cplxValue);
135		65534	complexVectorPtr += 4;
136
137			// Interleaves the upper two values in the i and q variables into one buffer
138		65534	cplxValue = _mm_unpackhi_ps(iValue, qValue);
139			_mm_store_ps(complexVectorPtr, cplxValue);
140		65534	complexVectorPtr += 4;
141
142		65534	iBufferPtr += 4;
143		65534	qBufferPtr += 4;
144			}
145
146		2	number = quarterPoints * 4;
147	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
148		6	complexVectorPtr++ = iBufferPtr++;
149		6	complexVectorPtr++ = qBufferPtr++;
150			}
151		2	}
152			#endif /* LV_HAVE_SSE */
153
154
155			#ifdef LV_HAVE_NEON
156			#include <arm_neon.h>
157
158			static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector,
159			const float* iBuffer,
160			const float* qBuffer,
161			unsigned int num_points)
162			{
163			unsigned int quarter_points = num_points / 4;
164			unsigned int number;
165			float* complexVectorPtr = (float*)complexVector;
166
167			float32x4x2_t complex_vec;
168			for (number = 0; number < quarter_points; ++number) {
169			complex_vec.val[0] = vld1q_f32(iBuffer);
170			complex_vec.val[1] = vld1q_f32(qBuffer);
171			vst2q_f32(complexVectorPtr, complex_vec);
172			iBuffer += 4;
173			qBuffer += 4;
174			complexVectorPtr += 8;
175			}
176
177			for (number = quarter_points * 4; number < num_points; ++number) {
178			complexVectorPtr++ = iBuffer++;
179			complexVectorPtr++ = qBuffer++;
180			}
181			}
182			#endif /* LV_HAVE_NEON */
183
184
185			#ifdef LV_HAVE_GENERIC
186
187		2	static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* complexVector,
188			const float* iBuffer,
189			const float* qBuffer,
190			unsigned int num_points)
191			{
192		2	float* complexVectorPtr = (float*)complexVector;
193		2	const float* iBufferPtr = iBuffer;
194		2	const float* qBufferPtr = qBuffer;
195			unsigned int number;
196
197	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
198		262142	complexVectorPtr++ = iBufferPtr++;
199		262142	complexVectorPtr++ = qBufferPtr++;
200			}
201		2	}
202			#endif /* LV_HAVE_GENERIC */
203
204
205			#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
206
207			#ifndef INCLUDED_volk_32f_x2_interleave_32fc_u_H
208			#define INCLUDED_volk_32f_x2_interleave_32fc_u_H
209
210			#include <inttypes.h>
211			#include <stdio.h>
212
213			#ifdef LV_HAVE_AVX
214			#include <immintrin.h>
215
216		2	static inline void volk_32f_x2_interleave_32fc_u_avx(lv_32fc_t* complexVector,
217			const float* iBuffer,
218			const float* qBuffer,
219			unsigned int num_points)
220			{
221		2	unsigned int number = 0;
222		2	float* complexVectorPtr = (float*)complexVector;
223		2	const float* iBufferPtr = iBuffer;
224		2	const float* qBufferPtr = qBuffer;
225
226		2	const uint64_t eighthPoints = num_points / 8;
227
228			__m256 iValue, qValue, cplxValue1, cplxValue2, cplxValue;
229	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
230		32766	iValue = _mm256_loadu_ps(iBufferPtr);
231		32766	qValue = _mm256_loadu_ps(qBufferPtr);
232
233			// Interleaves the lower two values in the i and q variables into one buffer
234		32766	cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
235			// Interleaves the upper two values in the i and q variables into one buffer
236		32766	cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
237
238		32766	cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
239			_mm256_storeu_ps(complexVectorPtr, cplxValue);
240		32766	complexVectorPtr += 8;
241
242		32766	cplxValue = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
243			_mm256_storeu_ps(complexVectorPtr, cplxValue);
244		32766	complexVectorPtr += 8;
245
246		32766	iBufferPtr += 8;
247		32766	qBufferPtr += 8;
248			}
249
250		2	number = eighthPoints * 8;
251	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
252		14	complexVectorPtr++ = iBufferPtr++;
253		14	complexVectorPtr++ = qBufferPtr++;
254			}
255		2	}
256			#endif /* LV_HAVE_AVX */
257
258			#endif /* INCLUDED_volk_32f_x2_interleave_32fc_u_H */
259