GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_s32f_convert_32i.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 157 171 91.8%
Functions: 8 8 100.0%
Branches: 40 54 74.1%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_s32f_convert_32i
12 *
13 * \b Overview
14 *
15 * Converts a floating point number to a 32-bit integer after applying a
16 * scaling factor.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32f_s32f_convert_32i(int32_t* outputVector, const float* inputVector, const
21 * float scalar, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li inputVector: the input vector of floats.
25 * \li scalar: The value multiplied against each point in the input buffer.
26 * \li num_points: The number of data points.
27 *
28 * \b Outputs
29 * \li outputVector: The output vector.
30 *
31 * \b Example
32 * Convert floats from [-1,1] to integers with a scale of 5 to maintain smallest delta
33 * \code
34 * int N = 10;
35 * unsigned int alignment = volk_get_alignment();
36 * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
37 * int32_t* out = (int32_t*)volk_malloc(sizeof(int32_t)*N, alignment);
38 *
39 * for(unsigned int ii = 0; ii < N; ++ii){
40 * increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
41 * }
42 *
43 * // Normalize by the smallest delta (0.2 in this example)
44 * float scale = 5.f;
45 *
46 * volk_32f_s32f_convert_32i(out, increasing, scale, N);
47 *
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("out[%u] = %i\n", ii, out[ii]);
50 * }
51 *
52 * volk_free(increasing);
53 * volk_free(out);
54 * \endcode
55 */
56
57 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
58 #define INCLUDED_volk_32f_s32f_convert_32i_u_H
59
60 #include <inttypes.h>
61 #include <limits.h>
62 #include <stdio.h>
63
64 #ifdef LV_HAVE_AVX
65 #include <immintrin.h>
66
67 2 static inline void volk_32f_s32f_convert_32i_u_avx(int32_t* outputVector,
68 const float* inputVector,
69 const float scalar,
70 unsigned int num_points)
71 {
72 2 unsigned int number = 0;
73
74 2 const unsigned int eighthPoints = num_points / 8;
75
76 2 const float* inputVectorPtr = (const float*)inputVector;
77 2 int32_t* outputVectorPtr = outputVector;
78
79 2 float min_val = INT_MIN;
80 2 float max_val = (uint32_t)INT_MAX + 1;
81 float r;
82
83 2 __m256 vScalar = _mm256_set1_ps(scalar);
84 __m256 inputVal1;
85 __m256i intInputVal1;
86 2 __m256 vmin_val = _mm256_set1_ps(min_val);
87 2 __m256 vmax_val = _mm256_set1_ps(max_val);
88
89
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
90 32766 inputVal1 = _mm256_loadu_ps(inputVectorPtr);
91 32766 inputVectorPtr += 8;
92
93 98298 inputVal1 = _mm256_max_ps(
94 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
95 32766 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
96
97 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
98 32766 outputVectorPtr += 8;
99 }
100
101 2 number = eighthPoints * 8;
102
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
103 14 r = inputVector[number] * scalar;
104
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (r > max_val)
105 r = max_val;
106
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 else if (r < min_val)
107 r = min_val;
108 14 outputVector[number] = (int32_t)rintf(r);
109 }
110 2 }
111
112 #endif /* LV_HAVE_AVX */
113
114 #ifdef LV_HAVE_SSE2
115 #include <emmintrin.h>
116
117 2 static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector,
118 const float* inputVector,
119 const float scalar,
120 unsigned int num_points)
121 {
122 2 unsigned int number = 0;
123
124 2 const unsigned int quarterPoints = num_points / 4;
125
126 2 const float* inputVectorPtr = (const float*)inputVector;
127 2 int32_t* outputVectorPtr = outputVector;
128
129 2 float min_val = INT_MIN;
130 2 float max_val = (uint32_t)INT_MAX + 1;
131 float r;
132
133 2 __m128 vScalar = _mm_set_ps1(scalar);
134 __m128 inputVal1;
135 __m128i intInputVal1;
136 2 __m128 vmin_val = _mm_set_ps1(min_val);
137 2 __m128 vmax_val = _mm_set_ps1(max_val);
138
139
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
140 65534 inputVal1 = _mm_loadu_ps(inputVectorPtr);
141 65534 inputVectorPtr += 4;
142
143 inputVal1 =
144 196602 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
145 65534 intInputVal1 = _mm_cvtps_epi32(inputVal1);
146
147 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
148 65534 outputVectorPtr += 4;
149 }
150
151 2 number = quarterPoints * 4;
152
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
153 6 r = inputVector[number] * scalar;
154
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (r > max_val)
155 r = max_val;
156
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 else if (r < min_val)
157 r = min_val;
158 6 outputVector[number] = (int32_t)rintf(r);
159 }
160 2 }
161
162 #endif /* LV_HAVE_SSE2 */
163
164
165 #ifdef LV_HAVE_SSE
166 #include <xmmintrin.h>
167
168 2 static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector,
169 const float* inputVector,
170 const float scalar,
171 unsigned int num_points)
172 {
173 2 unsigned int number = 0;
174
175 2 const unsigned int quarterPoints = num_points / 4;
176
177 2 const float* inputVectorPtr = (const float*)inputVector;
178 2 int32_t* outputVectorPtr = outputVector;
179
180 2 float min_val = INT_MIN;
181 2 float max_val = (uint32_t)INT_MAX + 1;
182 float r;
183
184 2 __m128 vScalar = _mm_set_ps1(scalar);
185 __m128 ret;
186 2 __m128 vmin_val = _mm_set_ps1(min_val);
187 2 __m128 vmax_val = _mm_set_ps1(max_val);
188
189 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
190
191
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
192 65534 ret = _mm_loadu_ps(inputVectorPtr);
193 65534 inputVectorPtr += 4;
194
195 196602 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
196
197 _mm_store_ps(outputFloatBuffer, ret);
198 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
199 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
200 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
201 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
202 }
203
204 2 number = quarterPoints * 4;
205
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
206 6 r = inputVector[number] * scalar;
207
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (r > max_val)
208 r = max_val;
209
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 else if (r < min_val)
210 r = min_val;
211 6 outputVector[number] = (int32_t)rintf(r);
212 }
213 2 }
214
215 #endif /* LV_HAVE_SSE */
216
217
218 #ifdef LV_HAVE_GENERIC
219
220 4 static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector,
221 const float* inputVector,
222 const float scalar,
223 unsigned int num_points)
224 {
225 4 int32_t* outputVectorPtr = outputVector;
226 4 const float* inputVectorPtr = inputVector;
227 4 const float min_val = (float)INT_MIN;
228 4 const float max_val = (float)((uint32_t)INT_MAX + 1);
229
230
2/2
✓ Branch 0 taken 524284 times.
✓ Branch 1 taken 4 times.
524288 for (unsigned int number = 0; number < num_points; number++) {
231 524284 const float r = *inputVectorPtr++ * scalar;
232 int s;
233
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 524284 times.
524284 if (r >= max_val)
234 s = INT_MAX;
235
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 524284 times.
524284 else if (r < min_val)
236 s = INT_MIN;
237 else
238 524284 s = (int32_t)rintf(r);
239 524284 *outputVectorPtr++ = s;
240 }
241 4 }
242
243 #endif /* LV_HAVE_GENERIC */
244
245
246 #endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
247 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
248 #define INCLUDED_volk_32f_s32f_convert_32i_a_H
249
250 #include <inttypes.h>
251 #include <stdio.h>
252 #include <volk/volk_common.h>
253
254 #ifdef LV_HAVE_AVX
255 #include <immintrin.h>
256
257 2 static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector,
258 const float* inputVector,
259 const float scalar,
260 unsigned int num_points)
261 {
262 2 unsigned int number = 0;
263
264 2 const unsigned int eighthPoints = num_points / 8;
265
266 2 const float* inputVectorPtr = (const float*)inputVector;
267 2 int32_t* outputVectorPtr = outputVector;
268
269 2 float min_val = INT_MIN;
270 2 float max_val = (uint32_t)INT_MAX + 1;
271 float r;
272
273 2 __m256 vScalar = _mm256_set1_ps(scalar);
274 __m256 inputVal1;
275 __m256i intInputVal1;
276 2 __m256 vmin_val = _mm256_set1_ps(min_val);
277 2 __m256 vmax_val = _mm256_set1_ps(max_val);
278
279
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
280 32766 inputVal1 = _mm256_load_ps(inputVectorPtr);
281 32766 inputVectorPtr += 8;
282
283 98298 inputVal1 = _mm256_max_ps(
284 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
285 32766 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
286
287 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
288 32766 outputVectorPtr += 8;
289 }
290
291 2 number = eighthPoints * 8;
292
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
293 14 r = inputVector[number] * scalar;
294
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 if (r > max_val)
295 r = max_val;
296
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 14 times.
14 else if (r < min_val)
297 r = min_val;
298 14 outputVector[number] = (int32_t)rintf(r);
299 }
300 2 }
301
302 #endif /* LV_HAVE_AVX */
303
304
305 #ifdef LV_HAVE_SSE2
306 #include <emmintrin.h>
307
308 2 static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector,
309 const float* inputVector,
310 const float scalar,
311 unsigned int num_points)
312 {
313 2 unsigned int number = 0;
314
315 2 const unsigned int quarterPoints = num_points / 4;
316
317 2 const float* inputVectorPtr = (const float*)inputVector;
318 2 int32_t* outputVectorPtr = outputVector;
319
320 2 float min_val = INT_MIN;
321 2 float max_val = (uint32_t)INT_MAX + 1;
322 float r;
323
324 2 __m128 vScalar = _mm_set_ps1(scalar);
325 __m128 inputVal1;
326 __m128i intInputVal1;
327 2 __m128 vmin_val = _mm_set_ps1(min_val);
328 2 __m128 vmax_val = _mm_set_ps1(max_val);
329
330
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
331 65534 inputVal1 = _mm_load_ps(inputVectorPtr);
332 65534 inputVectorPtr += 4;
333
334 inputVal1 =
335 196602 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
336 65534 intInputVal1 = _mm_cvtps_epi32(inputVal1);
337
338 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
339 65534 outputVectorPtr += 4;
340 }
341
342 2 number = quarterPoints * 4;
343
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
344 6 r = inputVector[number] * scalar;
345
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (r > max_val)
346 r = max_val;
347
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 else if (r < min_val)
348 r = min_val;
349 6 outputVector[number] = (int32_t)rintf(r);
350 }
351 2 }
352
353 #endif /* LV_HAVE_SSE2 */
354
355
356 #ifdef LV_HAVE_SSE
357 #include <xmmintrin.h>
358
359 2 static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
360 const float* inputVector,
361 const float scalar,
362 unsigned int num_points)
363 {
364 2 unsigned int number = 0;
365
366 2 const unsigned int quarterPoints = num_points / 4;
367
368 2 const float* inputVectorPtr = (const float*)inputVector;
369 2 int32_t* outputVectorPtr = outputVector;
370
371 2 float min_val = INT_MIN;
372 2 float max_val = (uint32_t)INT_MAX + 1;
373 float r;
374
375 2 __m128 vScalar = _mm_set_ps1(scalar);
376 __m128 ret;
377 2 __m128 vmin_val = _mm_set_ps1(min_val);
378 2 __m128 vmax_val = _mm_set_ps1(max_val);
379
380 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
381
382
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
383 65534 ret = _mm_load_ps(inputVectorPtr);
384 65534 inputVectorPtr += 4;
385
386 196602 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
387
388 _mm_store_ps(outputFloatBuffer, ret);
389 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
390 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
391 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
392 65534 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
393 }
394
395 2 number = quarterPoints * 4;
396
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
397 6 r = inputVector[number] * scalar;
398
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 if (r > max_val)
399 r = max_val;
400
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 6 times.
6 else if (r < min_val)
401 r = min_val;
402 6 outputVector[number] = (int32_t)rintf(r);
403 }
404 2 }
405
406 #endif /* LV_HAVE_SSE */
407
408
409 #ifdef LV_HAVE_GENERIC
410
411 2 static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector,
412 const float* inputVector,
413 const float scalar,
414 unsigned int num_points)
415 {
416 2 volk_32f_s32f_convert_32i_generic(outputVector, inputVector, scalar, num_points);
417 2 }
418
419 #endif /* LV_HAVE_GENERIC */
420
421 #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
422