GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32i_s32f_convert_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	84	118	71.2%
Functions:	6	8	75.0%
Branches:	20	28	71.4%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32i_s32f_convert_32f
    
       *
    
       * \b Overview
    
       *
    
       * Converts the samples in the inputVector from 32-bit integers into
    
       * floating point values and then divides them by the input scalar.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32i_s32f_convert_32f(float* outputVector, const int32_t* inputVector, const
    
       * float scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li inputVector: The vector of 32-bit integers.
    
       * \li scalar: The value that the output is divided by after being converted to a float.
    
       * \li num_points: The number of values.
    
       *
    
       * \b Outputs
    
       * \li complexVector: The output vector of floats.
    
       *
    
       * \b Example
    
       * Convert full-range integers to floats in range [0,1].
    
       * \code
    
       *   int N = 1<<8;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *
    
       *   int32_t* x = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
    
       *   float* z = (float*)volk_malloc(N*sizeof(float), alignment);
    
       *   float scale = (float)N;
    
       *   for(unsigned int ii=0; ii<N; ++ii){
    
       *       x[ii] = ii;
    
       *   }
    
       *
    
       *   volk_32i_s32f_convert_32f(z, x, scale, N);
    
       *
    
       *   volk_free(x);
    
       *   volk_free(z);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
    
      #define INCLUDED_volk_32i_s32f_convert_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector,
    
                                                             const int32_t* inputVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int onesixteenthPoints = num_points / 16;
    
      ✗
          float* outputVectorPtr = outputVector;
    
      ✗
          const float iScalar = 1.0 / scalar;
    
      ✗
          __m512 invScalar = _mm512_set1_ps(iScalar);
    
      ✗
          int32_t* inputPtr = (int32_t*)inputVector;
    
          __m512i inputVal;
    
          __m512 ret;
    
      ✗
          for (; number < onesixteenthPoints; number++) {
    
              // Load the values
    
      ✗
              inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
    
      ✗
              ret = _mm512_cvtepi32_ps(inputVal);
    
      ✗
              ret = _mm512_mul_ps(ret, invScalar);
    
              _mm512_storeu_ps(outputVectorPtr, ret);
    
      ✗
              outputVectorPtr += 16;
    
      ✗
              inputPtr += 16;
    
          }
    
      ✗
          number = onesixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              outputVector[number] = ((float)(inputVector[number])) * iScalar;
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector,
    
                                                          const int32_t* inputVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int oneEightPoints = num_points / 8;
    
      2
          float* outputVectorPtr = outputVector;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m256 invScalar = _mm256_set1_ps(iScalar);
    
      2
          int32_t* inputPtr = (int32_t*)inputVector;
    
          __m256i inputVal;
    
          __m256 ret;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < oneEightPoints; number++) {
    
              // Load the 4 values
    
      32766
              inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
    
      32766
              ret = _mm256_cvtepi32_ps(inputVal);
    
      32766
              ret = _mm256_mul_ps(ret, invScalar);
    
              _mm256_storeu_ps(outputVectorPtr, ret);
    
      32766
              outputVectorPtr += 8;
    
      32766
              inputPtr += 8;
    
          }
    
      2
          number = oneEightPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              outputVector[number] = ((float)(inputVector[number])) * iScalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector,
    
                                                          const int32_t* inputVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* outputVectorPtr = outputVector;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m128 invScalar = _mm_set_ps1(iScalar);
    
      2
          int32_t* inputPtr = (int32_t*)inputVector;
    
          __m128i inputVal;
    
          __m128 ret;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
              // Load the 4 values
    
      65534
              inputVal = _mm_loadu_si128((__m128i*)inputPtr);
    
      65534
              ret = _mm_cvtepi32_ps(inputVal);
    
      65534
              ret = _mm_mul_ps(ret, invScalar);
    
              _mm_storeu_ps(outputVectorPtr, ret);
    
      65534
              outputVectorPtr += 4;
    
      65534
              inputPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              outputVector[number] = ((float)(inputVector[number])) * iScalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32i_s32f_convert_32f_generic(float* outputVector,
    
                                                           const int32_t* inputVector,
    
                                                           const float scalar,
    
                                                           unsigned int num_points)
    
      {
    
      2
          float* outputVectorPtr = outputVector;
    
      2
          const int32_t* inputVectorPtr = inputVector;
    
      2
          unsigned int number = 0;
    
      2
          const float iScalar = 1.0 / scalar;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
    
      #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
    
      #define INCLUDED_volk_32i_s32f_convert_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector,
    
                                                             const int32_t* inputVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int onesixteenthPoints = num_points / 16;
    
      ✗
          float* outputVectorPtr = outputVector;
    
      ✗
          const float iScalar = 1.0 / scalar;
    
      ✗
          __m512 invScalar = _mm512_set1_ps(iScalar);
    
      ✗
          int32_t* inputPtr = (int32_t*)inputVector;
    
          __m512i inputVal;
    
          __m512 ret;
    
      ✗
          for (; number < onesixteenthPoints; number++) {
    
              // Load the values
    
      ✗
              inputVal = _mm512_load_si512((__m512i*)inputPtr);
    
      ✗
              ret = _mm512_cvtepi32_ps(inputVal);
    
      ✗
              ret = _mm512_mul_ps(ret, invScalar);
    
              _mm512_store_ps(outputVectorPtr, ret);
    
      ✗
              outputVectorPtr += 16;
    
      ✗
              inputPtr += 16;
    
          }
    
      ✗
          number = onesixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              outputVector[number] = ((float)(inputVector[number])) * iScalar;
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX2
    
      #include <immintrin.h>
    
      2
      static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector,
    
                                                          const int32_t* inputVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int oneEightPoints = num_points / 8;
    
      2
          float* outputVectorPtr = outputVector;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m256 invScalar = _mm256_set1_ps(iScalar);
    
      2
          int32_t* inputPtr = (int32_t*)inputVector;
    
          __m256i inputVal;
    
          __m256 ret;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < oneEightPoints; number++) {
    
              // Load the 4 values
    
      32766
              inputVal = _mm256_load_si256((__m256i*)inputPtr);
    
      32766
              ret = _mm256_cvtepi32_ps(inputVal);
    
      32766
              ret = _mm256_mul_ps(ret, invScalar);
    
              _mm256_store_ps(outputVectorPtr, ret);
    
      32766
              outputVectorPtr += 8;
    
      32766
              inputPtr += 8;
    
          }
    
      2
          number = oneEightPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              outputVector[number] = ((float)(inputVector[number])) * iScalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX2 */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
    
                                                          const int32_t* inputVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* outputVectorPtr = outputVector;
    
      2
          const float iScalar = 1.0 / scalar;
    
      2
          __m128 invScalar = _mm_set_ps1(iScalar);
    
      2
          int32_t* inputPtr = (int32_t*)inputVector;
    
          __m128i inputVal;
    
          __m128 ret;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
              // Load the 4 values
    
      65534
              inputVal = _mm_load_si128((__m128i*)inputPtr);
    
      65534
              ret = _mm_cvtepi32_ps(inputVal);
    
      65534
              ret = _mm_mul_ps(ret, invScalar);
    
              _mm_store_ps(outputVectorPtr, ret);
    
      65534
              outputVectorPtr += 4;
    
      65534
              inputPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              outputVector[number] = ((float)(inputVector[number])) * iScalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector,
    
                                                             const int32_t* inputVector,
    
                                                             const float scalar,
    
                                                             unsigned int num_points)
    
      {
    
      2
          float* outputVectorPtr = outputVector;
    
      2
          const int32_t* inputVectorPtr = inputVector;
    
      2
          unsigned int number = 0;
    
      2
          const float iScalar = 1.0 / scalar;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32i_s32f_convert_32f
12			*
13			* \b Overview
14			*
15			* Converts the samples in the inputVector from 32-bit integers into
16			* floating point values and then divides them by the input scalar.
17			*
18			* <b>Dispatcher Prototype</b>
19			* \code
20			* void volk_32i_s32f_convert_32f(float* outputVector, const int32_t* inputVector, const
21			* float scalar, unsigned int num_points) \endcode
22			*
23			* \b Inputs
24			* \li inputVector: The vector of 32-bit integers.
25			* \li scalar: The value that the output is divided by after being converted to a float.
26			* \li num_points: The number of values.
27			*
28			* \b Outputs
29			* \li complexVector: The output vector of floats.
30			*
31			* \b Example
32			* Convert full-range integers to floats in range [0,1].
33			* \code
34			* int N = 1<<8;
35			* unsigned int alignment = volk_get_alignment();
36			*
37			* int32_t* x = (int32_t)volk_malloc(Nsizeof(int32_t), alignment);
38			* float* z = (float)volk_malloc(Nsizeof(float), alignment);
39			* float scale = (float)N;
40			* for(unsigned int ii=0; ii<N; ++ii){
41			* x[ii] = ii;
42			* }
43			*
44			* volk_32i_s32f_convert_32f(z, x, scale, N);
45			*
46			* volk_free(x);
47			* volk_free(z);
48			* \endcode
49			*/
50
51			#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52			#define INCLUDED_volk_32i_s32f_convert_32f_u_H
53
54			#include <inttypes.h>
55			#include <stdio.h>
56
57			#ifdef LV_HAVE_AVX512F
58			#include <immintrin.h>
59
60		✗	static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector,
61			const int32_t* inputVector,
62			const float scalar,
63			unsigned int num_points)
64			{
65		✗	unsigned int number = 0;
66		✗	const unsigned int onesixteenthPoints = num_points / 16;
67
68		✗	float* outputVectorPtr = outputVector;
69		✗	const float iScalar = 1.0 / scalar;
70		✗	__m512 invScalar = _mm512_set1_ps(iScalar);
71		✗	int32_t* inputPtr = (int32_t*)inputVector;
72			__m512i inputVal;
73			__m512 ret;
74
75		✗	for (; number < onesixteenthPoints; number++) {
76			// Load the values
77		✗	inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
78
79		✗	ret = _mm512_cvtepi32_ps(inputVal);
80		✗	ret = _mm512_mul_ps(ret, invScalar);
81
82			_mm512_storeu_ps(outputVectorPtr, ret);
83
84		✗	outputVectorPtr += 16;
85		✗	inputPtr += 16;
86			}
87
88		✗	number = onesixteenthPoints * 16;
89		✗	for (; number < num_points; number++) {
90		✗	outputVector[number] = ((float)(inputVector[number])) * iScalar;
91			}
92		✗	}
93			#endif /* LV_HAVE_AVX512F */
94
95
96			#ifdef LV_HAVE_AVX2
97			#include <immintrin.h>
98
99		2	static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector,
100			const int32_t* inputVector,
101			const float scalar,
102			unsigned int num_points)
103			{
104		2	unsigned int number = 0;
105		2	const unsigned int oneEightPoints = num_points / 8;
106
107		2	float* outputVectorPtr = outputVector;
108		2	const float iScalar = 1.0 / scalar;
109		2	__m256 invScalar = _mm256_set1_ps(iScalar);
110		2	int32_t* inputPtr = (int32_t*)inputVector;
111			__m256i inputVal;
112			__m256 ret;
113
114	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < oneEightPoints; number++) {
115			// Load the 4 values
116		32766	inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
117
118		32766	ret = _mm256_cvtepi32_ps(inputVal);
119		32766	ret = _mm256_mul_ps(ret, invScalar);
120
121			_mm256_storeu_ps(outputVectorPtr, ret);
122
123		32766	outputVectorPtr += 8;
124		32766	inputPtr += 8;
125			}
126
127		2	number = oneEightPoints * 8;
128	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
129		14	outputVector[number] = ((float)(inputVector[number])) * iScalar;
130			}
131		2	}
132			#endif /* LV_HAVE_AVX2 */
133
134
135			#ifdef LV_HAVE_SSE2
136			#include <emmintrin.h>
137
138		2	static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector,
139			const int32_t* inputVector,
140			const float scalar,
141			unsigned int num_points)
142			{
143		2	unsigned int number = 0;
144		2	const unsigned int quarterPoints = num_points / 4;
145
146		2	float* outputVectorPtr = outputVector;
147		2	const float iScalar = 1.0 / scalar;
148		2	__m128 invScalar = _mm_set_ps1(iScalar);
149		2	int32_t* inputPtr = (int32_t*)inputVector;
150			__m128i inputVal;
151			__m128 ret;
152
153	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
154			// Load the 4 values
155		65534	inputVal = _mm_loadu_si128((__m128i*)inputPtr);
156
157		65534	ret = _mm_cvtepi32_ps(inputVal);
158		65534	ret = _mm_mul_ps(ret, invScalar);
159
160			_mm_storeu_ps(outputVectorPtr, ret);
161
162		65534	outputVectorPtr += 4;
163		65534	inputPtr += 4;
164			}
165
166		2	number = quarterPoints * 4;
167	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
168		6	outputVector[number] = ((float)(inputVector[number])) * iScalar;
169			}
170		2	}
171			#endif /* LV_HAVE_SSE2 */
172
173
174			#ifdef LV_HAVE_GENERIC
175
176		2	static inline void volk_32i_s32f_convert_32f_generic(float* outputVector,
177			const int32_t* inputVector,
178			const float scalar,
179			unsigned int num_points)
180			{
181		2	float* outputVectorPtr = outputVector;
182		2	const int32_t* inputVectorPtr = inputVector;
183		2	unsigned int number = 0;
184		2	const float iScalar = 1.0 / scalar;
185
186	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
187		262142	outputVectorPtr++ = ((float)(inputVectorPtr++)) * iScalar;
188			}
189		2	}
190			#endif /* LV_HAVE_GENERIC */
191
192			#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
193
194
195			#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196			#define INCLUDED_volk_32i_s32f_convert_32f_a_H
197
198			#include <inttypes.h>
199			#include <stdio.h>
200
201			#ifdef LV_HAVE_AVX512F
202			#include <immintrin.h>
203
204		✗	static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector,
205			const int32_t* inputVector,
206			const float scalar,
207			unsigned int num_points)
208			{
209		✗	unsigned int number = 0;
210		✗	const unsigned int onesixteenthPoints = num_points / 16;
211
212		✗	float* outputVectorPtr = outputVector;
213		✗	const float iScalar = 1.0 / scalar;
214		✗	__m512 invScalar = _mm512_set1_ps(iScalar);
215		✗	int32_t* inputPtr = (int32_t*)inputVector;
216			__m512i inputVal;
217			__m512 ret;
218
219		✗	for (; number < onesixteenthPoints; number++) {
220			// Load the values
221		✗	inputVal = _mm512_load_si512((__m512i*)inputPtr);
222
223		✗	ret = _mm512_cvtepi32_ps(inputVal);
224		✗	ret = _mm512_mul_ps(ret, invScalar);
225
226			_mm512_store_ps(outputVectorPtr, ret);
227
228		✗	outputVectorPtr += 16;
229		✗	inputPtr += 16;
230			}
231
232		✗	number = onesixteenthPoints * 16;
233		✗	for (; number < num_points; number++) {
234		✗	outputVector[number] = ((float)(inputVector[number])) * iScalar;
235			}
236		✗	}
237			#endif /* LV_HAVE_AVX512F */
238
239			#ifdef LV_HAVE_AVX2
240			#include <immintrin.h>
241
242		2	static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector,
243			const int32_t* inputVector,
244			const float scalar,
245			unsigned int num_points)
246			{
247		2	unsigned int number = 0;
248		2	const unsigned int oneEightPoints = num_points / 8;
249
250		2	float* outputVectorPtr = outputVector;
251		2	const float iScalar = 1.0 / scalar;
252		2	__m256 invScalar = _mm256_set1_ps(iScalar);
253		2	int32_t* inputPtr = (int32_t*)inputVector;
254			__m256i inputVal;
255			__m256 ret;
256
257	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < oneEightPoints; number++) {
258			// Load the 4 values
259		32766	inputVal = _mm256_load_si256((__m256i*)inputPtr);
260
261		32766	ret = _mm256_cvtepi32_ps(inputVal);
262		32766	ret = _mm256_mul_ps(ret, invScalar);
263
264			_mm256_store_ps(outputVectorPtr, ret);
265
266		32766	outputVectorPtr += 8;
267		32766	inputPtr += 8;
268			}
269
270		2	number = oneEightPoints * 8;
271	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
272		14	outputVector[number] = ((float)(inputVector[number])) * iScalar;
273			}
274		2	}
275			#endif /* LV_HAVE_AVX2 */
276
277
278			#ifdef LV_HAVE_SSE2
279			#include <emmintrin.h>
280
281		2	static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
282			const int32_t* inputVector,
283			const float scalar,
284			unsigned int num_points)
285			{
286		2	unsigned int number = 0;
287		2	const unsigned int quarterPoints = num_points / 4;
288
289		2	float* outputVectorPtr = outputVector;
290		2	const float iScalar = 1.0 / scalar;
291		2	__m128 invScalar = _mm_set_ps1(iScalar);
292		2	int32_t* inputPtr = (int32_t*)inputVector;
293			__m128i inputVal;
294			__m128 ret;
295
296	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
297			// Load the 4 values
298		65534	inputVal = _mm_load_si128((__m128i*)inputPtr);
299
300		65534	ret = _mm_cvtepi32_ps(inputVal);
301		65534	ret = _mm_mul_ps(ret, invScalar);
302
303			_mm_store_ps(outputVectorPtr, ret);
304
305		65534	outputVectorPtr += 4;
306		65534	inputPtr += 4;
307			}
308
309		2	number = quarterPoints * 4;
310	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
311		6	outputVector[number] = ((float)(inputVector[number])) * iScalar;
312			}
313		2	}
314			#endif /* LV_HAVE_SSE2 */
315
316
317			#ifdef LV_HAVE_GENERIC
318
319		2	static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector,
320			const int32_t* inputVector,
321			const float scalar,
322			unsigned int num_points)
323			{
324		2	float* outputVectorPtr = outputVector;
325		2	const int32_t* inputVectorPtr = inputVector;
326		2	unsigned int number = 0;
327		2	const float iScalar = 1.0 / scalar;
328
329	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
330		262142	outputVectorPtr++ = ((float)(inputVectorPtr++)) * iScalar;
331			}
332		2	}
333			#endif /* LV_HAVE_GENERIC */
334
335
336			#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
337