GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_16ic_convert_32fc.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	107	107	100.0%
Functions:	7	7	100.0%
Branches:	24	26	92.3%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2016 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_16ic_convert_32fc
    
       *
    
       * \b Overview
    
       *
    
       * Converts a complex vector of 16-bits integer each component
    
       * into a complex vector of 32-bits float each component.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_16ic_convert_32fc(lv_32fc_t* outputVector, const lv_16sc_t* inputVector,
    
       * unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li inputVector:  The complex 16-bit integer input data buffer.
    
       * \li num_points:   The number of data values to be converted.
    
       *
    
       * \b Outputs
    
       * \li outputVector: pointer to a vector holding the converted vector.
    
       *
    
       * \b Example
    
       * \code
    
       * int N = 10000;
    
       *
    
       * unsigned int alignment = volk_get_alignment();
    
       * lv_16sc_t* input  = (lv_16sc_t*)volk_malloc(sizeof(lv_16sc_t)*N, alignment);
    
       * lv_32fc_t* output  = (lv_32fc_t*)volk_malloc(sizeof(lv_32fc_t)*N, alignment);
    
       * volk_16ic_convert_32f(output, input, N);
    
       *
    
       * volk_free(input);
    
       * volk_free(output);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_16ic_convert_32fc_a_H
    
      #define INCLUDED_volk_16ic_convert_32fc_a_H
    
      #include <volk/volk_complex.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_16ic_convert_32fc_a_avx2(lv_32fc_t* outputVector,
    
                                                       const lv_16sc_t* inputVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          const unsigned int avx_iters = num_points / 8;
    
      2
          unsigned int number = 0;
    
      2
          const int16_t* complexVectorPtr = (int16_t*)inputVector;
    
      2
          float* outputVectorPtr = (float*)outputVector;
    
          __m256 outVal;
    
          __m256i outValInt;
    
          __m128i cplxValue;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (number = 0; number < avx_iters; number++) {
    
      32766
              cplxValue = _mm_load_si128((__m128i*)complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              outValInt = _mm256_cvtepi16_epi32(cplxValue);
    
      32766
              outVal = _mm256_cvtepi32_ps(outValInt);
    
              _mm256_store_ps((float*)outputVectorPtr, outVal);
    
      32766
              outputVectorPtr += 8;
    
          }
    
      2
          number = avx_iters * 8;
    
        2/2✓ Branch 0 taken 262156 times.
✓ Branch 1 taken 2 times.

      262158
          for (; number < num_points * 2; number++) {
    
      262156
              *outputVectorPtr++ = (float)*complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_16ic_convert_32fc_generic(lv_32fc_t* outputVector,
    
                                                        const lv_16sc_t* inputVector,
    
                                                        unsigned int num_points)
    
      {
    
          unsigned int i;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (i = 0; i < num_points; i++) {
    
      262142
              outputVector[i] =
    
      262142
                  lv_cmake((float)lv_creal(inputVector[i]), (float)lv_cimag(inputVector[i]));
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector,
    
                                                       const lv_16sc_t* inputVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          const unsigned int sse_iters = num_points / 2;
    
      2
          const lv_16sc_t* _in = inputVector;
    
      2
          lv_32fc_t* _out = outputVector;
    
          __m128 a;
    
          unsigned int number;
    
        2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.

      131072
          for (number = 0; number < sse_iters; number++) {
    
      131070
              a = _mm_set_ps(
    
      131070
                  (float)(lv_cimag(_in[1])),
    
      131070
                  (float)(lv_creal(_in[1])),
    
      131070
                  (float)(lv_cimag(_in[0])),
    
      131070
                  (float)(lv_creal(
    
                      _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
    
              _mm_store_ps((float*)_out, a);
    
      131070
              _in += 2;
    
      131070
              _out += 2;
    
          }
    
        1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.

      2
          if (num_points & 1) {
    
      2
              *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
    
      2
              _in++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_16ic_convert_32fc_a_avx(lv_32fc_t* outputVector,
    
                                                      const lv_16sc_t* inputVector,
    
                                                      unsigned int num_points)
    
      {
    
      2
          const unsigned int sse_iters = num_points / 4;
    
      2
          const lv_16sc_t* _in = inputVector;
    
      2
          lv_32fc_t* _out = outputVector;
    
          __m256 a;
    
          unsigned int i, number;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (number = 0; number < sse_iters; number++) {
    
      65534
              a = _mm256_set_ps(
    
      65534
                  (float)(lv_cimag(_in[3])),
    
      65534
                  (float)(lv_creal(_in[3])),
    
      65534
                  (float)(lv_cimag(_in[2])),
    
      65534
                  (float)(lv_creal(_in[2])),
    
      65534
                  (float)(lv_cimag(_in[1])),
    
      65534
                  (float)(lv_creal(_in[1])),
    
      65534
                  (float)(lv_cimag(_in[0])),
    
      65534
                  (float)(lv_creal(
    
                      _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
    
              _mm256_store_ps((float*)_out, a);
    
      65534
              _in += 4;
    
      65534
              _out += 4;
    
          }
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (i = 0; i < (num_points % 4); ++i) {
    
      6
              *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
    
      6
              _in++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_16ic_convert_32fc_neon(lv_32fc_t* outputVector,
    
                                                     const lv_16sc_t* inputVector,
    
                                                     unsigned int num_points)
    
      {
    
          const unsigned int sse_iters = num_points / 2;
    
          const lv_16sc_t* _in = inputVector;
    
          lv_32fc_t* _out = outputVector;
    
          int16x4_t a16x4;
    
          int32x4_t a32x4;
    
          float32x4_t f32x4;
    
          unsigned int i, number;
    
          for (number = 0; number < sse_iters; number++) {
    
              a16x4 = vld1_s16((const int16_t*)_in);
    
              __VOLK_PREFETCH(_in + 4);
    
              a32x4 = vmovl_s16(a16x4);
    
              f32x4 = vcvtq_f32_s32(a32x4);
    
              vst1q_f32((float32_t*)_out, f32x4);
    
              _in += 2;
    
              _out += 2;
    
          }
    
          for (i = 0; i < (num_points % 2); ++i) {
    
              *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
    
              _in++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #endif /* INCLUDED_volk_32fc_convert_16ic_a_H */
    
      #ifndef INCLUDED_volk_16ic_convert_32fc_u_H
    
      #define INCLUDED_volk_16ic_convert_32fc_u_H
    
      #include <volk/volk_complex.h>
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_16ic_convert_32fc_u_avx2(lv_32fc_t* outputVector,
    
                                                       const lv_16sc_t* inputVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          const unsigned int avx_iters = num_points / 8;
    
      2
          unsigned int number = 0;
    
      2
          const int16_t* complexVectorPtr = (int16_t*)inputVector;
    
      2
          float* outputVectorPtr = (float*)outputVector;
    
          __m256 outVal;
    
          __m256i outValInt;
    
          __m128i cplxValue;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (number = 0; number < avx_iters; number++) {
    
      32766
              cplxValue = _mm_loadu_si128((__m128i*)complexVectorPtr);
    
      32766
              complexVectorPtr += 8;
    
      32766
              outValInt = _mm256_cvtepi16_epi32(cplxValue);
    
      32766
              outVal = _mm256_cvtepi32_ps(outValInt);
    
              _mm256_storeu_ps((float*)outputVectorPtr, outVal);
    
      32766
              outputVectorPtr += 8;
    
          }
    
      2
          number = avx_iters * 8;
    
        2/2✓ Branch 0 taken 262156 times.
✓ Branch 1 taken 2 times.

      262158
          for (; number < num_points * 2; number++) {
    
      262156
              *outputVectorPtr++ = (float)*complexVectorPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector,
    
                                                       const lv_16sc_t* inputVector,
    
                                                       unsigned int num_points)
    
      {
    
      2
          const unsigned int sse_iters = num_points / 2;
    
      2
          const lv_16sc_t* _in = inputVector;
    
      2
          lv_32fc_t* _out = outputVector;
    
          __m128 a;
    
          unsigned int number;
    
        2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.

      131072
          for (number = 0; number < sse_iters; number++) {
    
      131070
              a = _mm_set_ps(
    
      131070
                  (float)(lv_cimag(_in[1])),
    
      131070
                  (float)(lv_creal(_in[1])),
    
      131070
                  (float)(lv_cimag(_in[0])),
    
      131070
                  (float)(lv_creal(
    
                      _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
    
              _mm_storeu_ps((float*)_out, a);
    
      131070
              _in += 2;
    
      131070
              _out += 2;
    
          }
    
        1/2✓ Branch 0 taken 2 times.
✗ Branch 1 not taken.

      2
          if (num_points & 1) {
    
      2
              *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
    
      2
              _in++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_16ic_convert_32fc_u_avx(lv_32fc_t* outputVector,
    
                                                      const lv_16sc_t* inputVector,
    
                                                      unsigned int num_points)
    
      {
    
      2
          const unsigned int sse_iters = num_points / 4;
    
      2
          const lv_16sc_t* _in = inputVector;
    
      2
          lv_32fc_t* _out = outputVector;
    
          __m256 a;
    
          unsigned int i, number;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (number = 0; number < sse_iters; number++) {
    
      65534
              a = _mm256_set_ps(
    
      65534
                  (float)(lv_cimag(_in[3])),
    
      65534
                  (float)(lv_creal(_in[3])),
    
      65534
                  (float)(lv_cimag(_in[2])),
    
      65534
                  (float)(lv_creal(_in[2])),
    
      65534
                  (float)(lv_cimag(_in[1])),
    
      65534
                  (float)(lv_creal(_in[1])),
    
      65534
                  (float)(lv_cimag(_in[0])),
    
      65534
                  (float)(lv_creal(
    
                      _in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
    
              _mm256_storeu_ps((float*)_out, a);
    
      65534
              _in += 4;
    
      65534
              _out += 4;
    
          }
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (i = 0; i < (num_points % 4); ++i) {
    
      6
              *_out++ = lv_cmake((float)lv_creal(*_in), (float)lv_cimag(*_in));
    
      6
              _in++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_32fc_convert_16ic_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2016 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_16ic_convert_32fc
12			*
13			* \b Overview
14			*
15			* Converts a complex vector of 16-bits integer each component
16			* into a complex vector of 32-bits float each component.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_16ic_convert_32fc(lv_32fc_t* outputVector, const lv_16sc_t* inputVector,
21			* unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li inputVector: The complex 16-bit integer input data buffer.
25			* \li num_points: The number of data values to be converted.
26			*
27			* \b Outputs
28			* \li outputVector: pointer to a vector holding the converted vector.
29			*
30			* \b Example
31			* \code
32			* int N = 10000;
33			*
34			* unsigned int alignment = volk_get_alignment();
35			* lv_16sc_t* input = (lv_16sc_t)volk_malloc(sizeof(lv_16sc_t)N, alignment);
36			* lv_32fc_t* output = (lv_32fc_t)volk_malloc(sizeof(lv_32fc_t)N, alignment);
37			* volk_16ic_convert_32f(output, input, N);
38			*
39			* volk_free(input);
40			* volk_free(output);
41			* \endcode
42			*/
43
44
45			#ifndef INCLUDED_volk_16ic_convert_32fc_a_H
46			#define INCLUDED_volk_16ic_convert_32fc_a_H
47
48			#include <volk/volk_complex.h>
49
50			#ifdef LV_HAVE_AVX2
51			#include <immintrin.h>
52
53		2	static inline void volk_16ic_convert_32fc_a_avx2(lv_32fc_t* outputVector,
54			const lv_16sc_t* inputVector,
55			unsigned int num_points)
56			{
57		2	const unsigned int avx_iters = num_points / 8;
58		2	unsigned int number = 0;
59		2	const int16_t* complexVectorPtr = (int16_t*)inputVector;
60		2	float* outputVectorPtr = (float*)outputVector;
61			__m256 outVal;
62			__m256i outValInt;
63			__m128i cplxValue;
64
65	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (number = 0; number < avx_iters; number++) {
66		32766	cplxValue = _mm_load_si128((__m128i*)complexVectorPtr);
67		32766	complexVectorPtr += 8;
68
69		32766	outValInt = _mm256_cvtepi16_epi32(cplxValue);
70		32766	outVal = _mm256_cvtepi32_ps(outValInt);
71			_mm256_store_ps((float*)outputVectorPtr, outVal);
72
73		32766	outputVectorPtr += 8;
74			}
75
76		2	number = avx_iters * 8;
77	2/2 ✓ Branch 0 taken 262156 times. ✓ Branch 1 taken 2 times.	262158	for (; number < num_points * 2; number++) {
78		262156	outputVectorPtr++ = (float)complexVectorPtr++;
79			}
80		2	}
81
82			#endif /* LV_HAVE_AVX2 */
83
84			#ifdef LV_HAVE_GENERIC
85
86		2	static inline void volk_16ic_convert_32fc_generic(lv_32fc_t* outputVector,
87			const lv_16sc_t* inputVector,
88			unsigned int num_points)
89			{
90			unsigned int i;
91	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (i = 0; i < num_points; i++) {
92		262142	outputVector[i] =
93		262142	lv_cmake((float)lv_creal(inputVector[i]), (float)lv_cimag(inputVector[i]));
94			}
95		2	}
96
97			#endif /* LV_HAVE_GENERIC */
98
99
100			#ifdef LV_HAVE_SSE2
101			#include <emmintrin.h>
102
103		2	static inline void volk_16ic_convert_32fc_a_sse2(lv_32fc_t* outputVector,
104			const lv_16sc_t* inputVector,
105			unsigned int num_points)
106			{
107		2	const unsigned int sse_iters = num_points / 2;
108
109		2	const lv_16sc_t* _in = inputVector;
110		2	lv_32fc_t* _out = outputVector;
111			__m128 a;
112			unsigned int number;
113
114	2/2 ✓ Branch 0 taken 131070 times. ✓ Branch 1 taken 2 times.	131072	for (number = 0; number < sse_iters; number++) {
115		131070	a = _mm_set_ps(
116		131070	(float)(lv_cimag(_in[1])),
117		131070	(float)(lv_creal(_in[1])),
118		131070	(float)(lv_cimag(_in[0])),
119		131070	(float)(lv_creal(
120			_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
121			_mm_store_ps((float*)_out, a);
122		131070	_in += 2;
123		131070	_out += 2;
124			}
125	1/2 ✓ Branch 0 taken 2 times. ✗ Branch 1 not taken.	2	if (num_points & 1) {
126		2	_out++ = lv_cmake((float)lv_creal(_in), (float)lv_cimag(*_in));
127		2	_in++;
128			}
129		2	}
130
131			#endif /* LV_HAVE_SSE2 */
132
133			#ifdef LV_HAVE_AVX
134			#include <immintrin.h>
135
136		2	static inline void volk_16ic_convert_32fc_a_avx(lv_32fc_t* outputVector,
137			const lv_16sc_t* inputVector,
138			unsigned int num_points)
139			{
140		2	const unsigned int sse_iters = num_points / 4;
141
142		2	const lv_16sc_t* _in = inputVector;
143		2	lv_32fc_t* _out = outputVector;
144			__m256 a;
145			unsigned int i, number;
146
147	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (number = 0; number < sse_iters; number++) {
148		65534	a = _mm256_set_ps(
149		65534	(float)(lv_cimag(_in[3])),
150		65534	(float)(lv_creal(_in[3])),
151		65534	(float)(lv_cimag(_in[2])),
152		65534	(float)(lv_creal(_in[2])),
153		65534	(float)(lv_cimag(_in[1])),
154		65534	(float)(lv_creal(_in[1])),
155		65534	(float)(lv_cimag(_in[0])),
156		65534	(float)(lv_creal(
157			_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
158			_mm256_store_ps((float*)_out, a);
159		65534	_in += 4;
160		65534	_out += 4;
161			}
162
163	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (i = 0; i < (num_points % 4); ++i) {
164		6	_out++ = lv_cmake((float)lv_creal(_in), (float)lv_cimag(*_in));
165		6	_in++;
166			}
167		2	}
168
169			#endif /* LV_HAVE_AVX */
170
171
172			#ifdef LV_HAVE_NEON
173			#include <arm_neon.h>
174
175			static inline void volk_16ic_convert_32fc_neon(lv_32fc_t* outputVector,
176			const lv_16sc_t* inputVector,
177			unsigned int num_points)
178			{
179			const unsigned int sse_iters = num_points / 2;
180
181			const lv_16sc_t* _in = inputVector;
182			lv_32fc_t* _out = outputVector;
183
184			int16x4_t a16x4;
185			int32x4_t a32x4;
186			float32x4_t f32x4;
187			unsigned int i, number;
188
189			for (number = 0; number < sse_iters; number++) {
190			a16x4 = vld1_s16((const int16_t*)_in);
191			__VOLK_PREFETCH(_in + 4);
192			a32x4 = vmovl_s16(a16x4);
193			f32x4 = vcvtq_f32_s32(a32x4);
194			vst1q_f32((float32_t*)_out, f32x4);
195			_in += 2;
196			_out += 2;
197			}
198			for (i = 0; i < (num_points % 2); ++i) {
199			_out++ = lv_cmake((float)lv_creal(_in), (float)lv_cimag(*_in));
200			_in++;
201			}
202			}
203			#endif /* LV_HAVE_NEON */
204
205			#endif /* INCLUDED_volk_32fc_convert_16ic_a_H */
206
207			#ifndef INCLUDED_volk_16ic_convert_32fc_u_H
208			#define INCLUDED_volk_16ic_convert_32fc_u_H
209
210			#include <volk/volk_complex.h>
211
212
213			#ifdef LV_HAVE_AVX2
214			#include <immintrin.h>
215
216		2	static inline void volk_16ic_convert_32fc_u_avx2(lv_32fc_t* outputVector,
217			const lv_16sc_t* inputVector,
218			unsigned int num_points)
219			{
220		2	const unsigned int avx_iters = num_points / 8;
221		2	unsigned int number = 0;
222		2	const int16_t* complexVectorPtr = (int16_t*)inputVector;
223		2	float* outputVectorPtr = (float*)outputVector;
224			__m256 outVal;
225			__m256i outValInt;
226			__m128i cplxValue;
227
228	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (number = 0; number < avx_iters; number++) {
229		32766	cplxValue = _mm_loadu_si128((__m128i*)complexVectorPtr);
230		32766	complexVectorPtr += 8;
231
232		32766	outValInt = _mm256_cvtepi16_epi32(cplxValue);
233		32766	outVal = _mm256_cvtepi32_ps(outValInt);
234			_mm256_storeu_ps((float*)outputVectorPtr, outVal);
235
236		32766	outputVectorPtr += 8;
237			}
238
239		2	number = avx_iters * 8;
240	2/2 ✓ Branch 0 taken 262156 times. ✓ Branch 1 taken 2 times.	262158	for (; number < num_points * 2; number++) {
241		262156	outputVectorPtr++ = (float)complexVectorPtr++;
242			}
243		2	}
244
245			#endif /* LV_HAVE_AVX2 */
246
247			#ifdef LV_HAVE_SSE2
248			#include <emmintrin.h>
249
250		2	static inline void volk_16ic_convert_32fc_u_sse2(lv_32fc_t* outputVector,
251			const lv_16sc_t* inputVector,
252			unsigned int num_points)
253			{
254		2	const unsigned int sse_iters = num_points / 2;
255
256		2	const lv_16sc_t* _in = inputVector;
257		2	lv_32fc_t* _out = outputVector;
258			__m128 a;
259			unsigned int number;
260
261	2/2 ✓ Branch 0 taken 131070 times. ✓ Branch 1 taken 2 times.	131072	for (number = 0; number < sse_iters; number++) {
262		131070	a = _mm_set_ps(
263		131070	(float)(lv_cimag(_in[1])),
264		131070	(float)(lv_creal(_in[1])),
265		131070	(float)(lv_cimag(_in[0])),
266		131070	(float)(lv_creal(
267			_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
268			_mm_storeu_ps((float*)_out, a);
269		131070	_in += 2;
270		131070	_out += 2;
271			}
272	1/2 ✓ Branch 0 taken 2 times. ✗ Branch 1 not taken.	2	if (num_points & 1) {
273		2	_out++ = lv_cmake((float)lv_creal(_in), (float)lv_cimag(*_in));
274		2	_in++;
275			}
276		2	}
277
278			#endif /* LV_HAVE_SSE2 */
279
280
281			#ifdef LV_HAVE_AVX
282			#include <immintrin.h>
283
284		2	static inline void volk_16ic_convert_32fc_u_avx(lv_32fc_t* outputVector,
285			const lv_16sc_t* inputVector,
286			unsigned int num_points)
287			{
288		2	const unsigned int sse_iters = num_points / 4;
289
290		2	const lv_16sc_t* _in = inputVector;
291		2	lv_32fc_t* _out = outputVector;
292			__m256 a;
293			unsigned int i, number;
294
295	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (number = 0; number < sse_iters; number++) {
296		65534	a = _mm256_set_ps(
297		65534	(float)(lv_cimag(_in[3])),
298		65534	(float)(lv_creal(_in[3])),
299		65534	(float)(lv_cimag(_in[2])),
300		65534	(float)(lv_creal(_in[2])),
301		65534	(float)(lv_cimag(_in[1])),
302		65534	(float)(lv_creal(_in[1])),
303		65534	(float)(lv_cimag(_in[0])),
304		65534	(float)(lv_creal(
305			_in[0]))); // //load (2 byte imag, 2 byte real) x 2 into 128 bits reg
306			_mm256_storeu_ps((float*)_out, a);
307		65534	_in += 4;
308		65534	_out += 4;
309			}
310
311	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (i = 0; i < (num_points % 4); ++i) {
312		6	_out++ = lv_cmake((float)lv_creal(_in), (float)lv_cimag(*_in));
313		6	_in++;
314			}
315		2	}
316
317			#endif /* LV_HAVE_AVX */
318			#endif /* INCLUDED_volk_32fc_convert_16ic_u_H */
319