Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32i_s32f_convert_32f | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Converts the samples in the inputVector from 32-bit integers into | ||
16 | * floating point values and then divides them by the input scalar. | ||
17 | * | ||
18 | * <b>Dispatcher Prototype</b> | ||
19 | * \code | ||
20 | * void volk_32i_s32f_convert_32f(float* outputVector, const int32_t* inputVector, const | ||
21 | * float scalar, unsigned int num_points) \endcode | ||
22 | * | ||
23 | * \b Inputs | ||
24 | * \li inputVector: The vector of 32-bit integers. | ||
25 | * \li scalar: The value that the output is divided by after being converted to a float. | ||
26 | * \li num_points: The number of values. | ||
27 | * | ||
28 | * \b Outputs | ||
29 | * \li complexVector: The output vector of floats. | ||
30 | * | ||
31 | * \b Example | ||
32 | * Convert full-range integers to floats in range [0,1]. | ||
33 | * \code | ||
34 | * int N = 1<<8; | ||
35 | * unsigned int alignment = volk_get_alignment(); | ||
36 | * | ||
37 | * int32_t* x = (int32_t*)volk_malloc(N*sizeof(int32_t), alignment); | ||
38 | * float* z = (float*)volk_malloc(N*sizeof(float), alignment); | ||
39 | * float scale = (float)N; | ||
40 | * for(unsigned int ii=0; ii<N; ++ii){ | ||
41 | * x[ii] = ii; | ||
42 | * } | ||
43 | * | ||
44 | * volk_32i_s32f_convert_32f(z, x, scale, N); | ||
45 | * | ||
46 | * volk_free(x); | ||
47 | * volk_free(z); | ||
48 | * \endcode | ||
49 | */ | ||
50 | |||
51 | #ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H | ||
52 | #define INCLUDED_volk_32i_s32f_convert_32f_u_H | ||
53 | |||
54 | #include <inttypes.h> | ||
55 | #include <stdio.h> | ||
56 | |||
57 | #ifdef LV_HAVE_AVX512F | ||
58 | #include <immintrin.h> | ||
59 | |||
60 | ✗ | static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector, | |
61 | const int32_t* inputVector, | ||
62 | const float scalar, | ||
63 | unsigned int num_points) | ||
64 | { | ||
65 | ✗ | unsigned int number = 0; | |
66 | ✗ | const unsigned int onesixteenthPoints = num_points / 16; | |
67 | |||
68 | ✗ | float* outputVectorPtr = outputVector; | |
69 | ✗ | const float iScalar = 1.0 / scalar; | |
70 | ✗ | __m512 invScalar = _mm512_set1_ps(iScalar); | |
71 | ✗ | int32_t* inputPtr = (int32_t*)inputVector; | |
72 | __m512i inputVal; | ||
73 | __m512 ret; | ||
74 | |||
75 | ✗ | for (; number < onesixteenthPoints; number++) { | |
76 | // Load the values | ||
77 | ✗ | inputVal = _mm512_loadu_si512((__m512i*)inputPtr); | |
78 | |||
79 | ✗ | ret = _mm512_cvtepi32_ps(inputVal); | |
80 | ✗ | ret = _mm512_mul_ps(ret, invScalar); | |
81 | |||
82 | _mm512_storeu_ps(outputVectorPtr, ret); | ||
83 | |||
84 | ✗ | outputVectorPtr += 16; | |
85 | ✗ | inputPtr += 16; | |
86 | } | ||
87 | |||
88 | ✗ | number = onesixteenthPoints * 16; | |
89 | ✗ | for (; number < num_points; number++) { | |
90 | ✗ | outputVector[number] = ((float)(inputVector[number])) * iScalar; | |
91 | } | ||
92 | ✗ | } | |
93 | #endif /* LV_HAVE_AVX512F */ | ||
94 | |||
95 | |||
96 | #ifdef LV_HAVE_AVX2 | ||
97 | #include <immintrin.h> | ||
98 | |||
99 | 2 | static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector, | |
100 | const int32_t* inputVector, | ||
101 | const float scalar, | ||
102 | unsigned int num_points) | ||
103 | { | ||
104 | 2 | unsigned int number = 0; | |
105 | 2 | const unsigned int oneEightPoints = num_points / 8; | |
106 | |||
107 | 2 | float* outputVectorPtr = outputVector; | |
108 | 2 | const float iScalar = 1.0 / scalar; | |
109 | 2 | __m256 invScalar = _mm256_set1_ps(iScalar); | |
110 | 2 | int32_t* inputPtr = (int32_t*)inputVector; | |
111 | __m256i inputVal; | ||
112 | __m256 ret; | ||
113 | |||
114 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < oneEightPoints; number++) { |
115 | // Load the 4 values | ||
116 | 32766 | inputVal = _mm256_loadu_si256((__m256i*)inputPtr); | |
117 | |||
118 | 32766 | ret = _mm256_cvtepi32_ps(inputVal); | |
119 | 32766 | ret = _mm256_mul_ps(ret, invScalar); | |
120 | |||
121 | _mm256_storeu_ps(outputVectorPtr, ret); | ||
122 | |||
123 | 32766 | outputVectorPtr += 8; | |
124 | 32766 | inputPtr += 8; | |
125 | } | ||
126 | |||
127 | 2 | number = oneEightPoints * 8; | |
128 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
129 | 14 | outputVector[number] = ((float)(inputVector[number])) * iScalar; | |
130 | } | ||
131 | 2 | } | |
132 | #endif /* LV_HAVE_AVX2 */ | ||
133 | |||
134 | |||
135 | #ifdef LV_HAVE_SSE2 | ||
136 | #include <emmintrin.h> | ||
137 | |||
138 | 2 | static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, | |
139 | const int32_t* inputVector, | ||
140 | const float scalar, | ||
141 | unsigned int num_points) | ||
142 | { | ||
143 | 2 | unsigned int number = 0; | |
144 | 2 | const unsigned int quarterPoints = num_points / 4; | |
145 | |||
146 | 2 | float* outputVectorPtr = outputVector; | |
147 | 2 | const float iScalar = 1.0 / scalar; | |
148 | 2 | __m128 invScalar = _mm_set_ps1(iScalar); | |
149 | 2 | int32_t* inputPtr = (int32_t*)inputVector; | |
150 | __m128i inputVal; | ||
151 | __m128 ret; | ||
152 | |||
153 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
154 | // Load the 4 values | ||
155 | 65534 | inputVal = _mm_loadu_si128((__m128i*)inputPtr); | |
156 | |||
157 | 65534 | ret = _mm_cvtepi32_ps(inputVal); | |
158 | 65534 | ret = _mm_mul_ps(ret, invScalar); | |
159 | |||
160 | _mm_storeu_ps(outputVectorPtr, ret); | ||
161 | |||
162 | 65534 | outputVectorPtr += 4; | |
163 | 65534 | inputPtr += 4; | |
164 | } | ||
165 | |||
166 | 2 | number = quarterPoints * 4; | |
167 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
168 | 6 | outputVector[number] = ((float)(inputVector[number])) * iScalar; | |
169 | } | ||
170 | 2 | } | |
171 | #endif /* LV_HAVE_SSE2 */ | ||
172 | |||
173 | |||
174 | #ifdef LV_HAVE_GENERIC | ||
175 | |||
176 | 2 | static inline void volk_32i_s32f_convert_32f_generic(float* outputVector, | |
177 | const int32_t* inputVector, | ||
178 | const float scalar, | ||
179 | unsigned int num_points) | ||
180 | { | ||
181 | 2 | float* outputVectorPtr = outputVector; | |
182 | 2 | const int32_t* inputVectorPtr = inputVector; | |
183 | 2 | unsigned int number = 0; | |
184 | 2 | const float iScalar = 1.0 / scalar; | |
185 | |||
186 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
187 | 262142 | *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar; | |
188 | } | ||
189 | 2 | } | |
190 | #endif /* LV_HAVE_GENERIC */ | ||
191 | |||
192 | #endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */ | ||
193 | |||
194 | |||
195 | #ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H | ||
196 | #define INCLUDED_volk_32i_s32f_convert_32f_a_H | ||
197 | |||
198 | #include <inttypes.h> | ||
199 | #include <stdio.h> | ||
200 | |||
201 | #ifdef LV_HAVE_AVX512F | ||
202 | #include <immintrin.h> | ||
203 | |||
204 | ✗ | static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector, | |
205 | const int32_t* inputVector, | ||
206 | const float scalar, | ||
207 | unsigned int num_points) | ||
208 | { | ||
209 | ✗ | unsigned int number = 0; | |
210 | ✗ | const unsigned int onesixteenthPoints = num_points / 16; | |
211 | |||
212 | ✗ | float* outputVectorPtr = outputVector; | |
213 | ✗ | const float iScalar = 1.0 / scalar; | |
214 | ✗ | __m512 invScalar = _mm512_set1_ps(iScalar); | |
215 | ✗ | int32_t* inputPtr = (int32_t*)inputVector; | |
216 | __m512i inputVal; | ||
217 | __m512 ret; | ||
218 | |||
219 | ✗ | for (; number < onesixteenthPoints; number++) { | |
220 | // Load the values | ||
221 | ✗ | inputVal = _mm512_load_si512((__m512i*)inputPtr); | |
222 | |||
223 | ✗ | ret = _mm512_cvtepi32_ps(inputVal); | |
224 | ✗ | ret = _mm512_mul_ps(ret, invScalar); | |
225 | |||
226 | _mm512_store_ps(outputVectorPtr, ret); | ||
227 | |||
228 | ✗ | outputVectorPtr += 16; | |
229 | ✗ | inputPtr += 16; | |
230 | } | ||
231 | |||
232 | ✗ | number = onesixteenthPoints * 16; | |
233 | ✗ | for (; number < num_points; number++) { | |
234 | ✗ | outputVector[number] = ((float)(inputVector[number])) * iScalar; | |
235 | } | ||
236 | ✗ | } | |
237 | #endif /* LV_HAVE_AVX512F */ | ||
238 | |||
239 | #ifdef LV_HAVE_AVX2 | ||
240 | #include <immintrin.h> | ||
241 | |||
242 | 2 | static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector, | |
243 | const int32_t* inputVector, | ||
244 | const float scalar, | ||
245 | unsigned int num_points) | ||
246 | { | ||
247 | 2 | unsigned int number = 0; | |
248 | 2 | const unsigned int oneEightPoints = num_points / 8; | |
249 | |||
250 | 2 | float* outputVectorPtr = outputVector; | |
251 | 2 | const float iScalar = 1.0 / scalar; | |
252 | 2 | __m256 invScalar = _mm256_set1_ps(iScalar); | |
253 | 2 | int32_t* inputPtr = (int32_t*)inputVector; | |
254 | __m256i inputVal; | ||
255 | __m256 ret; | ||
256 | |||
257 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < oneEightPoints; number++) { |
258 | // Load the 4 values | ||
259 | 32766 | inputVal = _mm256_load_si256((__m256i*)inputPtr); | |
260 | |||
261 | 32766 | ret = _mm256_cvtepi32_ps(inputVal); | |
262 | 32766 | ret = _mm256_mul_ps(ret, invScalar); | |
263 | |||
264 | _mm256_store_ps(outputVectorPtr, ret); | ||
265 | |||
266 | 32766 | outputVectorPtr += 8; | |
267 | 32766 | inputPtr += 8; | |
268 | } | ||
269 | |||
270 | 2 | number = oneEightPoints * 8; | |
271 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
272 | 14 | outputVector[number] = ((float)(inputVector[number])) * iScalar; | |
273 | } | ||
274 | 2 | } | |
275 | #endif /* LV_HAVE_AVX2 */ | ||
276 | |||
277 | |||
278 | #ifdef LV_HAVE_SSE2 | ||
279 | #include <emmintrin.h> | ||
280 | |||
281 | 2 | static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, | |
282 | const int32_t* inputVector, | ||
283 | const float scalar, | ||
284 | unsigned int num_points) | ||
285 | { | ||
286 | 2 | unsigned int number = 0; | |
287 | 2 | const unsigned int quarterPoints = num_points / 4; | |
288 | |||
289 | 2 | float* outputVectorPtr = outputVector; | |
290 | 2 | const float iScalar = 1.0 / scalar; | |
291 | 2 | __m128 invScalar = _mm_set_ps1(iScalar); | |
292 | 2 | int32_t* inputPtr = (int32_t*)inputVector; | |
293 | __m128i inputVal; | ||
294 | __m128 ret; | ||
295 | |||
296 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
297 | // Load the 4 values | ||
298 | 65534 | inputVal = _mm_load_si128((__m128i*)inputPtr); | |
299 | |||
300 | 65534 | ret = _mm_cvtepi32_ps(inputVal); | |
301 | 65534 | ret = _mm_mul_ps(ret, invScalar); | |
302 | |||
303 | _mm_store_ps(outputVectorPtr, ret); | ||
304 | |||
305 | 65534 | outputVectorPtr += 4; | |
306 | 65534 | inputPtr += 4; | |
307 | } | ||
308 | |||
309 | 2 | number = quarterPoints * 4; | |
310 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
311 | 6 | outputVector[number] = ((float)(inputVector[number])) * iScalar; | |
312 | } | ||
313 | 2 | } | |
314 | #endif /* LV_HAVE_SSE2 */ | ||
315 | |||
316 | |||
317 | #ifdef LV_HAVE_GENERIC | ||
318 | |||
319 | 2 | static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector, | |
320 | const int32_t* inputVector, | ||
321 | const float scalar, | ||
322 | unsigned int num_points) | ||
323 | { | ||
324 | 2 | float* outputVectorPtr = outputVector; | |
325 | 2 | const int32_t* inputVectorPtr = inputVector; | |
326 | 2 | unsigned int number = 0; | |
327 | 2 | const float iScalar = 1.0 / scalar; | |
328 | |||
329 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
330 | 262142 | *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar; | |
331 | } | ||
332 | 2 | } | |
333 | #endif /* LV_HAVE_GENERIC */ | ||
334 | |||
335 | |||
336 | #endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */ | ||
337 |