GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32i_s32f_convert_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 84 118 71.2%
Functions: 6 8 75.0%
Branches: 20 28 71.4%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32i_s32f_convert_32f
12 *
13 * \b Overview
14 *
15 * Converts the samples in the inputVector from 32-bit integers into
16 * floating point values and then divides them by the input scalar.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32i_s32f_convert_32f(float* outputVector, const int32_t* inputVector, const
21 * float scalar, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li inputVector: The vector of 32-bit integers.
25 * \li scalar: The value that the output is divided by after being converted to a float.
26 * \li num_points: The number of values.
27 *
28 * \b Outputs
29 * \li complexVector: The output vector of floats.
30 *
31 * \b Example
32 * Convert full-range integers to floats in range [0,1].
33 * \code
34 * int N = 1<<8;
35 * unsigned int alignment = volk_get_alignment();
36 *
37 * int32_t* x = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment);
38 * float* z = (float*)volk_malloc(N*sizeof(float), alignment);
39 * float scale = (float)N;
40 * for(unsigned int ii=0; ii<N; ++ii){
41 * x[ii] = ii;
42 * }
43 *
44 * volk_32i_s32f_convert_32f(z, x, scale, N);
45 *
46 * volk_free(x);
47 * volk_free(z);
48 * \endcode
49 */
50
51 #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52 #define INCLUDED_volk_32i_s32f_convert_32f_u_H
53
54 #include <inttypes.h>
55 #include <stdio.h>
56
57 #ifdef LV_HAVE_AVX512F
58 #include <immintrin.h>
59
60 static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector,
61 const int32_t* inputVector,
62 const float scalar,
63 unsigned int num_points)
64 {
65 unsigned int number = 0;
66 const unsigned int onesixteenthPoints = num_points / 16;
67
68 float* outputVectorPtr = outputVector;
69 const float iScalar = 1.0 / scalar;
70 __m512 invScalar = _mm512_set1_ps(iScalar);
71 int32_t* inputPtr = (int32_t*)inputVector;
72 __m512i inputVal;
73 __m512 ret;
74
75 for (; number < onesixteenthPoints; number++) {
76 // Load the values
77 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
78
79 ret = _mm512_cvtepi32_ps(inputVal);
80 ret = _mm512_mul_ps(ret, invScalar);
81
82 _mm512_storeu_ps(outputVectorPtr, ret);
83
84 outputVectorPtr += 16;
85 inputPtr += 16;
86 }
87
88 number = onesixteenthPoints * 16;
89 for (; number < num_points; number++) {
90 outputVector[number] = ((float)(inputVector[number])) * iScalar;
91 }
92 }
93 #endif /* LV_HAVE_AVX512F */
94
95
96 #ifdef LV_HAVE_AVX2
97 #include <immintrin.h>
98
99 2 static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector,
100 const int32_t* inputVector,
101 const float scalar,
102 unsigned int num_points)
103 {
104 2 unsigned int number = 0;
105 2 const unsigned int oneEightPoints = num_points / 8;
106
107 2 float* outputVectorPtr = outputVector;
108 2 const float iScalar = 1.0 / scalar;
109 2 __m256 invScalar = _mm256_set1_ps(iScalar);
110 2 int32_t* inputPtr = (int32_t*)inputVector;
111 __m256i inputVal;
112 __m256 ret;
113
114
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < oneEightPoints; number++) {
115 // Load the 4 values
116 32766 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
117
118 32766 ret = _mm256_cvtepi32_ps(inputVal);
119 32766 ret = _mm256_mul_ps(ret, invScalar);
120
121 _mm256_storeu_ps(outputVectorPtr, ret);
122
123 32766 outputVectorPtr += 8;
124 32766 inputPtr += 8;
125 }
126
127 2 number = oneEightPoints * 8;
128
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
129 14 outputVector[number] = ((float)(inputVector[number])) * iScalar;
130 }
131 2 }
132 #endif /* LV_HAVE_AVX2 */
133
134
135 #ifdef LV_HAVE_SSE2
136 #include <emmintrin.h>
137
138 2 static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector,
139 const int32_t* inputVector,
140 const float scalar,
141 unsigned int num_points)
142 {
143 2 unsigned int number = 0;
144 2 const unsigned int quarterPoints = num_points / 4;
145
146 2 float* outputVectorPtr = outputVector;
147 2 const float iScalar = 1.0 / scalar;
148 2 __m128 invScalar = _mm_set_ps1(iScalar);
149 2 int32_t* inputPtr = (int32_t*)inputVector;
150 __m128i inputVal;
151 __m128 ret;
152
153
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
154 // Load the 4 values
155 65534 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
156
157 65534 ret = _mm_cvtepi32_ps(inputVal);
158 65534 ret = _mm_mul_ps(ret, invScalar);
159
160 _mm_storeu_ps(outputVectorPtr, ret);
161
162 65534 outputVectorPtr += 4;
163 65534 inputPtr += 4;
164 }
165
166 2 number = quarterPoints * 4;
167
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
168 6 outputVector[number] = ((float)(inputVector[number])) * iScalar;
169 }
170 2 }
171 #endif /* LV_HAVE_SSE2 */
172
173
174 #ifdef LV_HAVE_GENERIC
175
176 2 static inline void volk_32i_s32f_convert_32f_generic(float* outputVector,
177 const int32_t* inputVector,
178 const float scalar,
179 unsigned int num_points)
180 {
181 2 float* outputVectorPtr = outputVector;
182 2 const int32_t* inputVectorPtr = inputVector;
183 2 unsigned int number = 0;
184 2 const float iScalar = 1.0 / scalar;
185
186
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
187 262142 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
188 }
189 2 }
190 #endif /* LV_HAVE_GENERIC */
191
192 #endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
193
194
195 #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196 #define INCLUDED_volk_32i_s32f_convert_32f_a_H
197
198 #include <inttypes.h>
199 #include <stdio.h>
200
201 #ifdef LV_HAVE_AVX512F
202 #include <immintrin.h>
203
204 static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector,
205 const int32_t* inputVector,
206 const float scalar,
207 unsigned int num_points)
208 {
209 unsigned int number = 0;
210 const unsigned int onesixteenthPoints = num_points / 16;
211
212 float* outputVectorPtr = outputVector;
213 const float iScalar = 1.0 / scalar;
214 __m512 invScalar = _mm512_set1_ps(iScalar);
215 int32_t* inputPtr = (int32_t*)inputVector;
216 __m512i inputVal;
217 __m512 ret;
218
219 for (; number < onesixteenthPoints; number++) {
220 // Load the values
221 inputVal = _mm512_load_si512((__m512i*)inputPtr);
222
223 ret = _mm512_cvtepi32_ps(inputVal);
224 ret = _mm512_mul_ps(ret, invScalar);
225
226 _mm512_store_ps(outputVectorPtr, ret);
227
228 outputVectorPtr += 16;
229 inputPtr += 16;
230 }
231
232 number = onesixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = ((float)(inputVector[number])) * iScalar;
235 }
236 }
237 #endif /* LV_HAVE_AVX512F */
238
239 #ifdef LV_HAVE_AVX2
240 #include <immintrin.h>
241
242 2 static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector,
243 const int32_t* inputVector,
244 const float scalar,
245 unsigned int num_points)
246 {
247 2 unsigned int number = 0;
248 2 const unsigned int oneEightPoints = num_points / 8;
249
250 2 float* outputVectorPtr = outputVector;
251 2 const float iScalar = 1.0 / scalar;
252 2 __m256 invScalar = _mm256_set1_ps(iScalar);
253 2 int32_t* inputPtr = (int32_t*)inputVector;
254 __m256i inputVal;
255 __m256 ret;
256
257
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < oneEightPoints; number++) {
258 // Load the 4 values
259 32766 inputVal = _mm256_load_si256((__m256i*)inputPtr);
260
261 32766 ret = _mm256_cvtepi32_ps(inputVal);
262 32766 ret = _mm256_mul_ps(ret, invScalar);
263
264 _mm256_store_ps(outputVectorPtr, ret);
265
266 32766 outputVectorPtr += 8;
267 32766 inputPtr += 8;
268 }
269
270 2 number = oneEightPoints * 8;
271
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
272 14 outputVector[number] = ((float)(inputVector[number])) * iScalar;
273 }
274 2 }
275 #endif /* LV_HAVE_AVX2 */
276
277
278 #ifdef LV_HAVE_SSE2
279 #include <emmintrin.h>
280
281 2 static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
282 const int32_t* inputVector,
283 const float scalar,
284 unsigned int num_points)
285 {
286 2 unsigned int number = 0;
287 2 const unsigned int quarterPoints = num_points / 4;
288
289 2 float* outputVectorPtr = outputVector;
290 2 const float iScalar = 1.0 / scalar;
291 2 __m128 invScalar = _mm_set_ps1(iScalar);
292 2 int32_t* inputPtr = (int32_t*)inputVector;
293 __m128i inputVal;
294 __m128 ret;
295
296
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
297 // Load the 4 values
298 65534 inputVal = _mm_load_si128((__m128i*)inputPtr);
299
300 65534 ret = _mm_cvtepi32_ps(inputVal);
301 65534 ret = _mm_mul_ps(ret, invScalar);
302
303 _mm_store_ps(outputVectorPtr, ret);
304
305 65534 outputVectorPtr += 4;
306 65534 inputPtr += 4;
307 }
308
309 2 number = quarterPoints * 4;
310
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
311 6 outputVector[number] = ((float)(inputVector[number])) * iScalar;
312 }
313 2 }
314 #endif /* LV_HAVE_SSE2 */
315
316
317 #ifdef LV_HAVE_GENERIC
318
319 2 static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector,
320 const int32_t* inputVector,
321 const float scalar,
322 unsigned int num_points)
323 {
324 2 float* outputVectorPtr = outputVector;
325 2 const int32_t* inputVectorPtr = inputVector;
326 2 unsigned int number = 0;
327 2 const float iScalar = 1.0 / scalar;
328
329
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
330 262142 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
331 }
332 2 }
333 #endif /* LV_HAVE_GENERIC */
334
335
336 #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
337