GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	117	117	100.0%
Functions:	5	5	100.0%
Branches:	18	18	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_x2_s32f_interleave_16ic
    
       *
    
       * \b Overview
    
       *
    
       * Takes input vector iBuffer as the real (inphase) part and input
    
       * vector qBuffer as the imag (quadrature) part and combines them into
    
       * a complex output vector. The output is scaled by the input scalar
    
       * value and convert to a 16-bit short comlex number.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_x2_s32f_interleave_16ic(lv_16sc_t* complexVector, const float* iBuffer,
    
       * const float* qBuffer, const float scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li iBuffer: Input vector of samples for the real part.
    
       * \li qBuffer: Input vector of samples for the imaginary part.
    
       * \;i scalar:  The scalar value used to scale the values before converting to shorts.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li complexVector: The output vector of complex numbers.
    
       *
    
       * \b Example
    
       * Generate points around the unit circle and convert to complex integers.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* imag = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* real = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   lv_16sc_t* out = (lv_16sc_t*)volk_malloc(sizeof(lv_16sc_t)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       real[ii] = 2.f * ((float)ii / (float)N) - 1.f;
    
       *       imag[ii] = std::sqrt(1.f - real[ii] * real[ii]);
    
       *   }
    
       *   // Normalize by smallest delta (0.02 in this example)
    
       *   float scale = 50.f;
    
       *
    
       *   volk_32f_x2_s32f_interleave_16ic(out, imag, real, scale, N);
    
       *
    
       *  for(unsigned int ii = 0; ii < N; ++ii){
    
       *      printf("out[%u] = %i + %ij\n", ii, std::real(out[ii]), std::imag(out[ii]));
    
       *  }
    
       *
    
       *   volk_free(imag);
    
       *   volk_free(real);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
    
      #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(lv_16sc_t* complexVector,
    
                                                                 const float* iBuffer,
    
                                                                 const float* qBuffer,
    
                                                                 const float scalar,
    
                                                                 unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
          __m256 iValue, qValue, cplxValue1, cplxValue2;
    
          __m256i intValue1, intValue2;
    
      2
          int16_t* complexVectorPtr = (int16_t*)complexVector;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              iValue = _mm256_load_ps(iBufferPtr);
    
      32766
              qValue = _mm256_load_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      32766
              cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      32766
              cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
    
      32766
              intValue1 = _mm256_cvtps_epi32(cplxValue1);
    
      32766
              intValue2 = _mm256_cvtps_epi32(cplxValue2);
    
      32766
              intValue1 = _mm256_packs_epi32(intValue1, intValue2);
    
              _mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
    
      32766
              complexVectorPtr += 16;
    
      32766
              iBufferPtr += 8;
    
      32766
              qBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          complexVectorPtr = (int16_t*)(&complexVector[number]);
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
    
      14
              *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector,
    
                                                                 const float* iBuffer,
    
                                                                 const float* qBuffer,
    
                                                                 const float scalar,
    
                                                                 unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 iValue, qValue, cplxValue1, cplxValue2;
    
          __m128i intValue1, intValue2;
    
      2
          int16_t* complexVectorPtr = (int16_t*)complexVector;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              iValue = _mm_load_ps(iBufferPtr);
    
      65534
              qValue = _mm_load_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      65534
              cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
    
      65534
              cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      65534
              cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
    
      65534
              cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
    
      65534
              intValue1 = _mm_cvtps_epi32(cplxValue1);
    
      65534
              intValue2 = _mm_cvtps_epi32(cplxValue2);
    
      65534
              intValue1 = _mm_packs_epi32(intValue1, intValue2);
    
              _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
    
      65534
              complexVectorPtr += 8;
    
      65534
              iBufferPtr += 4;
    
      65534
              qBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          complexVectorPtr = (int16_t*)(&complexVector[number]);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
    
      6
              *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector,
    
                                                                const float* iBuffer,
    
                                                                const float* qBuffer,
    
                                                                const float scalar,
    
                                                                unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          __m128 vScalar = _mm_set_ps1(scalar);
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
          __m128 iValue, qValue, cplxValue;
    
      2
          int16_t* complexVectorPtr = (int16_t*)complexVector;
    
          __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              iValue = _mm_load_ps(iBufferPtr);
    
      65534
              qValue = _mm_load_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      65534
              cplxValue = _mm_unpacklo_ps(iValue, qValue);
    
      65534
              cplxValue = _mm_mul_ps(cplxValue, vScalar);
    
              _mm_store_ps(floatBuffer, cplxValue);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      65534
              cplxValue = _mm_unpackhi_ps(iValue, qValue);
    
      65534
              cplxValue = _mm_mul_ps(cplxValue, vScalar);
    
              _mm_store_ps(floatBuffer, cplxValue);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
    
      65534
              *complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
    
      65534
              iBufferPtr += 4;
    
      65534
              qBufferPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
      2
          complexVectorPtr = (int16_t*)(&complexVector[number]);
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
    
      6
              *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t* complexVector,
    
                                                                  const float* iBuffer,
    
                                                                  const float* qBuffer,
    
                                                                  const float scalar,
    
                                                                  unsigned int num_points)
    
      {
    
      2
          int16_t* complexVectorPtr = (int16_t*)complexVector;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
    
      262142
              *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */
    
      #ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
    
      #define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(lv_16sc_t* complexVector,
    
                                                                 const float* iBuffer,
    
                                                                 const float* qBuffer,
    
                                                                 const float scalar,
    
                                                                 unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* iBufferPtr = iBuffer;
    
      2
          const float* qBufferPtr = qBuffer;
    
      2
          __m256 vScalar = _mm256_set1_ps(scalar);
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
          __m256 iValue, qValue, cplxValue1, cplxValue2;
    
          __m256i intValue1, intValue2;
    
      2
          int16_t* complexVectorPtr = (int16_t*)complexVector;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              iValue = _mm256_loadu_ps(iBufferPtr);
    
      32766
              qValue = _mm256_loadu_ps(qBufferPtr);
    
              // Interleaves the lower two values in the i and q variables into one buffer
    
      32766
              cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
    
      32766
              cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
    
              // Interleaves the upper two values in the i and q variables into one buffer
    
      32766
              cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
    
      32766
              cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
    
      32766
              intValue1 = _mm256_cvtps_epi32(cplxValue1);
    
      32766
              intValue2 = _mm256_cvtps_epi32(cplxValue2);
    
      32766
              intValue1 = _mm256_packs_epi32(intValue1, intValue2);
    
              _mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
    
      32766
              complexVectorPtr += 16;
    
      32766
              iBufferPtr += 8;
    
      32766
              qBufferPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
      2
          complexVectorPtr = (int16_t*)(&complexVector[number]);
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *complexVectorPtr++ = (int16_t)rintf(*iBufferPtr++ * scalar);
    
      14
              *complexVectorPtr++ = (int16_t)rintf(*qBufferPtr++ * scalar);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_x2_s32f_interleave_16ic
12			*
13			* \b Overview
14			*
15			* Takes input vector iBuffer as the real (inphase) part and input
16			* vector qBuffer as the imag (quadrature) part and combines them into
17			* a complex output vector. The output is scaled by the input scalar
18			* value and convert to a 16-bit short comlex number.
19			*
20			* <b>Dispatcher Prototype</b>
21			* \code
22			* void volk_32f_x2_s32f_interleave_16ic(lv_16sc_t* complexVector, const float* iBuffer,
23			* const float* qBuffer, const float scalar, unsigned int num_points) \endcode
24			*
25			* \b Inputs
26			* \li iBuffer: Input vector of samples for the real part.
27			* \li qBuffer: Input vector of samples for the imaginary part.
28			* \;i scalar: The scalar value used to scale the values before converting to shorts.
29			* \li num_points: The number of values in both input vectors.
30			*
31			* \b Outputs
32			* \li complexVector: The output vector of complex numbers.
33			*
34			* \b Example
35			* Generate points around the unit circle and convert to complex integers.
36			* \code
37			* int N = 10;
38			* unsigned int alignment = volk_get_alignment();
39			* float* imag = (float)volk_malloc(sizeof(float)N, alignment);
40			* float* real = (float)volk_malloc(sizeof(float)N, alignment);
41			* lv_16sc_t* out = (lv_16sc_t)volk_malloc(sizeof(lv_16sc_t)N, alignment);
42			*
43			* for(unsigned int ii = 0; ii < N; ++ii){
44			* real[ii] = 2.f * ((float)ii / (float)N) - 1.f;
45			* imag[ii] = std::sqrt(1.f - real[ii] * real[ii]);
46			* }
47			* // Normalize by smallest delta (0.02 in this example)
48			* float scale = 50.f;
49			*
50			* volk_32f_x2_s32f_interleave_16ic(out, imag, real, scale, N);
51			*
52			* for(unsigned int ii = 0; ii < N; ++ii){
53			* printf("out[%u] = %i + %ij\n", ii, std::real(out[ii]), std::imag(out[ii]));
54			* }
55			*
56			* volk_free(imag);
57			* volk_free(real);
58			* volk_free(out);
59			* \endcode
60			*/
61
62			#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
63			#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
64
65			#include <inttypes.h>
66			#include <stdio.h>
67			#include <volk/volk_common.h>
68
69			#ifdef LV_HAVE_AVX2
70			#include <immintrin.h>
71
72		2	static inline void volk_32f_x2_s32f_interleave_16ic_a_avx2(lv_16sc_t* complexVector,
73			const float* iBuffer,
74			const float* qBuffer,
75			const float scalar,
76			unsigned int num_points)
77			{
78		2	unsigned int number = 0;
79		2	const float* iBufferPtr = iBuffer;
80		2	const float* qBufferPtr = qBuffer;
81
82		2	__m256 vScalar = _mm256_set1_ps(scalar);
83
84		2	const unsigned int eighthPoints = num_points / 8;
85
86			__m256 iValue, qValue, cplxValue1, cplxValue2;
87			__m256i intValue1, intValue2;
88
89		2	int16_t* complexVectorPtr = (int16_t*)complexVector;
90
91	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
92		32766	iValue = _mm256_load_ps(iBufferPtr);
93		32766	qValue = _mm256_load_ps(qBufferPtr);
94
95			// Interleaves the lower two values in the i and q variables into one buffer
96		32766	cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
97		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
98
99			// Interleaves the upper two values in the i and q variables into one buffer
100		32766	cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
101		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
102
103		32766	intValue1 = _mm256_cvtps_epi32(cplxValue1);
104		32766	intValue2 = _mm256_cvtps_epi32(cplxValue2);
105
106		32766	intValue1 = _mm256_packs_epi32(intValue1, intValue2);
107
108			_mm256_store_si256((__m256i*)complexVectorPtr, intValue1);
109		32766	complexVectorPtr += 16;
110
111		32766	iBufferPtr += 8;
112		32766	qBufferPtr += 8;
113			}
114
115		2	number = eighthPoints * 8;
116		2	complexVectorPtr = (int16_t*)(&complexVector[number]);
117	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
118		14	complexVectorPtr++ = (int16_t)rintf(iBufferPtr++ * scalar);
119		14	complexVectorPtr++ = (int16_t)rintf(qBufferPtr++ * scalar);
120			}
121		2	}
122			#endif /* LV_HAVE_AVX2 */
123
124
125			#ifdef LV_HAVE_SSE2
126			#include <emmintrin.h>
127
128		2	static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* complexVector,
129			const float* iBuffer,
130			const float* qBuffer,
131			const float scalar,
132			unsigned int num_points)
133			{
134		2	unsigned int number = 0;
135		2	const float* iBufferPtr = iBuffer;
136		2	const float* qBufferPtr = qBuffer;
137
138		2	__m128 vScalar = _mm_set_ps1(scalar);
139
140		2	const unsigned int quarterPoints = num_points / 4;
141
142			__m128 iValue, qValue, cplxValue1, cplxValue2;
143			__m128i intValue1, intValue2;
144
145		2	int16_t* complexVectorPtr = (int16_t*)complexVector;
146
147	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
148		65534	iValue = _mm_load_ps(iBufferPtr);
149		65534	qValue = _mm_load_ps(qBufferPtr);
150
151			// Interleaves the lower two values in the i and q variables into one buffer
152		65534	cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
153		65534	cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
154
155			// Interleaves the upper two values in the i and q variables into one buffer
156		65534	cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
157		65534	cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
158
159		65534	intValue1 = _mm_cvtps_epi32(cplxValue1);
160		65534	intValue2 = _mm_cvtps_epi32(cplxValue2);
161
162		65534	intValue1 = _mm_packs_epi32(intValue1, intValue2);
163
164			_mm_store_si128((__m128i*)complexVectorPtr, intValue1);
165		65534	complexVectorPtr += 8;
166
167		65534	iBufferPtr += 4;
168		65534	qBufferPtr += 4;
169			}
170
171		2	number = quarterPoints * 4;
172		2	complexVectorPtr = (int16_t*)(&complexVector[number]);
173	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
174		6	complexVectorPtr++ = (int16_t)rintf(iBufferPtr++ * scalar);
175		6	complexVectorPtr++ = (int16_t)rintf(qBufferPtr++ * scalar);
176			}
177		2	}
178			#endif /* LV_HAVE_SSE2 */
179
180
181			#ifdef LV_HAVE_SSE
182			#include <xmmintrin.h>
183
184		2	static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* complexVector,
185			const float* iBuffer,
186			const float* qBuffer,
187			const float scalar,
188			unsigned int num_points)
189			{
190		2	unsigned int number = 0;
191		2	const float* iBufferPtr = iBuffer;
192		2	const float* qBufferPtr = qBuffer;
193
194		2	__m128 vScalar = _mm_set_ps1(scalar);
195
196		2	const unsigned int quarterPoints = num_points / 4;
197
198			__m128 iValue, qValue, cplxValue;
199
200		2	int16_t* complexVectorPtr = (int16_t*)complexVector;
201
202			__VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
203
204	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
205		65534	iValue = _mm_load_ps(iBufferPtr);
206		65534	qValue = _mm_load_ps(qBufferPtr);
207
208			// Interleaves the lower two values in the i and q variables into one buffer
209		65534	cplxValue = _mm_unpacklo_ps(iValue, qValue);
210		65534	cplxValue = _mm_mul_ps(cplxValue, vScalar);
211
212			_mm_store_ps(floatBuffer, cplxValue);
213
214		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
215		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
216		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
217		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
218
219			// Interleaves the upper two values in the i and q variables into one buffer
220		65534	cplxValue = _mm_unpackhi_ps(iValue, qValue);
221		65534	cplxValue = _mm_mul_ps(cplxValue, vScalar);
222
223			_mm_store_ps(floatBuffer, cplxValue);
224
225		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
226		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
227		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
228		65534	*complexVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
229
230		65534	iBufferPtr += 4;
231		65534	qBufferPtr += 4;
232			}
233
234		2	number = quarterPoints * 4;
235		2	complexVectorPtr = (int16_t*)(&complexVector[number]);
236	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
237		6	complexVectorPtr++ = (int16_t)rintf(iBufferPtr++ * scalar);
238		6	complexVectorPtr++ = (int16_t)rintf(qBufferPtr++ * scalar);
239			}
240		2	}
241			#endif /* LV_HAVE_SSE */
242
243
244			#ifdef LV_HAVE_GENERIC
245
246		2	static inline void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t* complexVector,
247			const float* iBuffer,
248			const float* qBuffer,
249			const float scalar,
250			unsigned int num_points)
251			{
252		2	int16_t* complexVectorPtr = (int16_t*)complexVector;
253		2	const float* iBufferPtr = iBuffer;
254		2	const float* qBufferPtr = qBuffer;
255		2	unsigned int number = 0;
256
257	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
258		262142	complexVectorPtr++ = (int16_t)rintf(iBufferPtr++ * scalar);
259		262142	complexVectorPtr++ = (int16_t)rintf(qBufferPtr++ * scalar);
260			}
261		2	}
262			#endif /* LV_HAVE_GENERIC */
263
264
265			#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */
266
267			#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
268			#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H
269
270			#include <inttypes.h>
271			#include <stdio.h>
272			#include <volk/volk_common.h>
273
274			#ifdef LV_HAVE_AVX2
275			#include <immintrin.h>
276
277		2	static inline void volk_32f_x2_s32f_interleave_16ic_u_avx2(lv_16sc_t* complexVector,
278			const float* iBuffer,
279			const float* qBuffer,
280			const float scalar,
281			unsigned int num_points)
282			{
283		2	unsigned int number = 0;
284		2	const float* iBufferPtr = iBuffer;
285		2	const float* qBufferPtr = qBuffer;
286
287		2	__m256 vScalar = _mm256_set1_ps(scalar);
288
289		2	const unsigned int eighthPoints = num_points / 8;
290
291			__m256 iValue, qValue, cplxValue1, cplxValue2;
292			__m256i intValue1, intValue2;
293
294		2	int16_t* complexVectorPtr = (int16_t*)complexVector;
295
296	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
297		32766	iValue = _mm256_loadu_ps(iBufferPtr);
298		32766	qValue = _mm256_loadu_ps(qBufferPtr);
299
300			// Interleaves the lower two values in the i and q variables into one buffer
301		32766	cplxValue1 = _mm256_unpacklo_ps(iValue, qValue);
302		32766	cplxValue1 = _mm256_mul_ps(cplxValue1, vScalar);
303
304			// Interleaves the upper two values in the i and q variables into one buffer
305		32766	cplxValue2 = _mm256_unpackhi_ps(iValue, qValue);
306		32766	cplxValue2 = _mm256_mul_ps(cplxValue2, vScalar);
307
308		32766	intValue1 = _mm256_cvtps_epi32(cplxValue1);
309		32766	intValue2 = _mm256_cvtps_epi32(cplxValue2);
310
311		32766	intValue1 = _mm256_packs_epi32(intValue1, intValue2);
312
313			_mm256_storeu_si256((__m256i*)complexVectorPtr, intValue1);
314		32766	complexVectorPtr += 16;
315
316		32766	iBufferPtr += 8;
317		32766	qBufferPtr += 8;
318			}
319
320		2	number = eighthPoints * 8;
321		2	complexVectorPtr = (int16_t*)(&complexVector[number]);
322	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
323		14	complexVectorPtr++ = (int16_t)rintf(iBufferPtr++ * scalar);
324		14	complexVectorPtr++ = (int16_t)rintf(qBufferPtr++ * scalar);
325			}
326		2	}
327			#endif /* LV_HAVE_AVX2 */
328
329
330			#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_u_H */
331