Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_64f_convert_32f | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Converts doubles into floats. | ||
16 | * | ||
17 | * <b>Dispatcher Prototype</b> | ||
18 | * \code | ||
19 | * void volk_64f_convert_32f(float* outputVector, const double* inputVector, unsigned int | ||
20 | * num_points) \endcode | ||
21 | * | ||
22 | * \b Inputs | ||
23 | * \li inputVector: The vector of doubles to convert to floats. | ||
24 | * \li num_points: The number of data points. | ||
25 | * | ||
26 | * \b Outputs | ||
27 | * \li outputVector: returns the converted floats. | ||
28 | * | ||
29 | * \b Example | ||
30 | * \code | ||
31 | * int N = 10; | ||
32 | * unsigned int alignment = volk_get_alignment(); | ||
33 | * double* increasing = (double*)volk_malloc(sizeof(double)*N, alignment); | ||
34 | * float* out = (float*)volk_malloc(sizeof(float)*N, alignment); | ||
35 | * | ||
36 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
37 | * increasing[ii] = (double)ii; | ||
38 | * } | ||
39 | * | ||
40 | * volk_64f_convert_32f(out, increasing, N); | ||
41 | * | ||
42 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
43 | * printf("out[%u] = %1.2f\n", ii, out[ii]); | ||
44 | * } | ||
45 | * | ||
46 | * volk_free(increasing); | ||
47 | * volk_free(out); | ||
48 | * \endcode | ||
49 | */ | ||
50 | |||
51 | #ifndef INCLUDED_volk_64f_convert_32f_u_H | ||
52 | #define INCLUDED_volk_64f_convert_32f_u_H | ||
53 | |||
54 | #include <inttypes.h> | ||
55 | #include <stdio.h> | ||
56 | |||
57 | #ifdef LV_HAVE_AVX512F | ||
58 | #include <immintrin.h> | ||
59 | |||
60 | ✗ | static inline void volk_64f_convert_32f_u_avx512f(float* outputVector, | |
61 | const double* inputVector, | ||
62 | unsigned int num_points) | ||
63 | { | ||
64 | ✗ | unsigned int number = 0; | |
65 | |||
66 | ✗ | const unsigned int oneSixteenthPoints = num_points / 16; | |
67 | |||
68 | ✗ | const double* inputVectorPtr = (const double*)inputVector; | |
69 | ✗ | float* outputVectorPtr = outputVector; | |
70 | __m256 ret1, ret2; | ||
71 | __m512d inputVal1, inputVal2; | ||
72 | |||
73 | ✗ | for (; number < oneSixteenthPoints; number++) { | |
74 | ✗ | inputVal1 = _mm512_loadu_pd(inputVectorPtr); | |
75 | ✗ | inputVectorPtr += 8; | |
76 | ✗ | inputVal2 = _mm512_loadu_pd(inputVectorPtr); | |
77 | ✗ | inputVectorPtr += 8; | |
78 | |||
79 | ✗ | ret1 = _mm512_cvtpd_ps(inputVal1); | |
80 | ✗ | ret2 = _mm512_cvtpd_ps(inputVal2); | |
81 | |||
82 | _mm256_storeu_ps(outputVectorPtr, ret1); | ||
83 | ✗ | outputVectorPtr += 8; | |
84 | |||
85 | _mm256_storeu_ps(outputVectorPtr, ret2); | ||
86 | ✗ | outputVectorPtr += 8; | |
87 | } | ||
88 | |||
89 | ✗ | number = oneSixteenthPoints * 16; | |
90 | ✗ | for (; number < num_points; number++) { | |
91 | ✗ | outputVector[number] = (float)(inputVector[number]); | |
92 | } | ||
93 | ✗ | } | |
94 | #endif /* LV_HAVE_AVX512F */ | ||
95 | |||
96 | |||
97 | #ifdef LV_HAVE_AVX | ||
98 | #include <immintrin.h> | ||
99 | |||
100 | 2 | static inline void volk_64f_convert_32f_u_avx(float* outputVector, | |
101 | const double* inputVector, | ||
102 | unsigned int num_points) | ||
103 | { | ||
104 | 2 | unsigned int number = 0; | |
105 | |||
106 | 2 | const unsigned int oneEightPoints = num_points / 8; | |
107 | |||
108 | 2 | const double* inputVectorPtr = (const double*)inputVector; | |
109 | 2 | float* outputVectorPtr = outputVector; | |
110 | __m128 ret1, ret2; | ||
111 | __m256d inputVal1, inputVal2; | ||
112 | |||
113 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < oneEightPoints; number++) { |
114 | 32766 | inputVal1 = _mm256_loadu_pd(inputVectorPtr); | |
115 | 32766 | inputVectorPtr += 4; | |
116 | 32766 | inputVal2 = _mm256_loadu_pd(inputVectorPtr); | |
117 | 32766 | inputVectorPtr += 4; | |
118 | |||
119 | 32766 | ret1 = _mm256_cvtpd_ps(inputVal1); | |
120 | 32766 | ret2 = _mm256_cvtpd_ps(inputVal2); | |
121 | |||
122 | _mm_storeu_ps(outputVectorPtr, ret1); | ||
123 | 32766 | outputVectorPtr += 4; | |
124 | |||
125 | _mm_storeu_ps(outputVectorPtr, ret2); | ||
126 | 32766 | outputVectorPtr += 4; | |
127 | } | ||
128 | |||
129 | 2 | number = oneEightPoints * 8; | |
130 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
131 | 14 | outputVector[number] = (float)(inputVector[number]); | |
132 | } | ||
133 | 2 | } | |
134 | #endif /* LV_HAVE_AVX */ | ||
135 | |||
136 | |||
137 | #ifdef LV_HAVE_SSE2 | ||
138 | #include <emmintrin.h> | ||
139 | |||
140 | 2 | static inline void volk_64f_convert_32f_u_sse2(float* outputVector, | |
141 | const double* inputVector, | ||
142 | unsigned int num_points) | ||
143 | { | ||
144 | 2 | unsigned int number = 0; | |
145 | |||
146 | 2 | const unsigned int quarterPoints = num_points / 4; | |
147 | |||
148 | 2 | const double* inputVectorPtr = (const double*)inputVector; | |
149 | 2 | float* outputVectorPtr = outputVector; | |
150 | __m128 ret, ret2; | ||
151 | __m128d inputVal1, inputVal2; | ||
152 | |||
153 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
154 | 65534 | inputVal1 = _mm_loadu_pd(inputVectorPtr); | |
155 | 65534 | inputVectorPtr += 2; | |
156 | 65534 | inputVal2 = _mm_loadu_pd(inputVectorPtr); | |
157 | 65534 | inputVectorPtr += 2; | |
158 | |||
159 | 65534 | ret = _mm_cvtpd_ps(inputVal1); | |
160 | 65534 | ret2 = _mm_cvtpd_ps(inputVal2); | |
161 | |||
162 | 65534 | ret = _mm_movelh_ps(ret, ret2); | |
163 | |||
164 | _mm_storeu_ps(outputVectorPtr, ret); | ||
165 | 65534 | outputVectorPtr += 4; | |
166 | } | ||
167 | |||
168 | 2 | number = quarterPoints * 4; | |
169 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
170 | 6 | outputVector[number] = (float)(inputVector[number]); | |
171 | } | ||
172 | 2 | } | |
173 | #endif /* LV_HAVE_SSE2 */ | ||
174 | |||
175 | |||
176 | #ifdef LV_HAVE_GENERIC | ||
177 | |||
178 | 2 | static inline void volk_64f_convert_32f_generic(float* outputVector, | |
179 | const double* inputVector, | ||
180 | unsigned int num_points) | ||
181 | { | ||
182 | 2 | float* outputVectorPtr = outputVector; | |
183 | 2 | const double* inputVectorPtr = inputVector; | |
184 | 2 | unsigned int number = 0; | |
185 | |||
186 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
187 | 262142 | *outputVectorPtr++ = ((float)(*inputVectorPtr++)); | |
188 | } | ||
189 | 2 | } | |
190 | #endif /* LV_HAVE_GENERIC */ | ||
191 | |||
192 | |||
193 | #endif /* INCLUDED_volk_64f_convert_32f_u_H */ | ||
194 | #ifndef INCLUDED_volk_64f_convert_32f_a_H | ||
195 | #define INCLUDED_volk_64f_convert_32f_a_H | ||
196 | |||
197 | #include <inttypes.h> | ||
198 | #include <stdio.h> | ||
199 | |||
200 | #ifdef LV_HAVE_AVX512F | ||
201 | #include <immintrin.h> | ||
202 | |||
203 | ✗ | static inline void volk_64f_convert_32f_a_avx512f(float* outputVector, | |
204 | const double* inputVector, | ||
205 | unsigned int num_points) | ||
206 | { | ||
207 | ✗ | unsigned int number = 0; | |
208 | |||
209 | ✗ | const unsigned int oneSixteenthPoints = num_points / 16; | |
210 | |||
211 | ✗ | const double* inputVectorPtr = (const double*)inputVector; | |
212 | ✗ | float* outputVectorPtr = outputVector; | |
213 | __m256 ret1, ret2; | ||
214 | __m512d inputVal1, inputVal2; | ||
215 | |||
216 | ✗ | for (; number < oneSixteenthPoints; number++) { | |
217 | ✗ | inputVal1 = _mm512_load_pd(inputVectorPtr); | |
218 | ✗ | inputVectorPtr += 8; | |
219 | ✗ | inputVal2 = _mm512_load_pd(inputVectorPtr); | |
220 | ✗ | inputVectorPtr += 8; | |
221 | |||
222 | ✗ | ret1 = _mm512_cvtpd_ps(inputVal1); | |
223 | ✗ | ret2 = _mm512_cvtpd_ps(inputVal2); | |
224 | |||
225 | _mm256_store_ps(outputVectorPtr, ret1); | ||
226 | ✗ | outputVectorPtr += 8; | |
227 | |||
228 | _mm256_store_ps(outputVectorPtr, ret2); | ||
229 | ✗ | outputVectorPtr += 8; | |
230 | } | ||
231 | |||
232 | ✗ | number = oneSixteenthPoints * 16; | |
233 | ✗ | for (; number < num_points; number++) { | |
234 | ✗ | outputVector[number] = (float)(inputVector[number]); | |
235 | } | ||
236 | ✗ | } | |
237 | #endif /* LV_HAVE_AVX512F */ | ||
238 | |||
239 | |||
240 | #ifdef LV_HAVE_AVX | ||
241 | #include <immintrin.h> | ||
242 | |||
243 | 2 | static inline void volk_64f_convert_32f_a_avx(float* outputVector, | |
244 | const double* inputVector, | ||
245 | unsigned int num_points) | ||
246 | { | ||
247 | 2 | unsigned int number = 0; | |
248 | |||
249 | 2 | const unsigned int oneEightPoints = num_points / 8; | |
250 | |||
251 | 2 | const double* inputVectorPtr = (const double*)inputVector; | |
252 | 2 | float* outputVectorPtr = outputVector; | |
253 | __m128 ret1, ret2; | ||
254 | __m256d inputVal1, inputVal2; | ||
255 | |||
256 |
2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
|
32768 | for (; number < oneEightPoints; number++) { |
257 | 32766 | inputVal1 = _mm256_load_pd(inputVectorPtr); | |
258 | 32766 | inputVectorPtr += 4; | |
259 | 32766 | inputVal2 = _mm256_load_pd(inputVectorPtr); | |
260 | 32766 | inputVectorPtr += 4; | |
261 | |||
262 | 32766 | ret1 = _mm256_cvtpd_ps(inputVal1); | |
263 | 32766 | ret2 = _mm256_cvtpd_ps(inputVal2); | |
264 | |||
265 | _mm_store_ps(outputVectorPtr, ret1); | ||
266 | 32766 | outputVectorPtr += 4; | |
267 | |||
268 | _mm_store_ps(outputVectorPtr, ret2); | ||
269 | 32766 | outputVectorPtr += 4; | |
270 | } | ||
271 | |||
272 | 2 | number = oneEightPoints * 8; | |
273 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (; number < num_points; number++) { |
274 | 14 | outputVector[number] = (float)(inputVector[number]); | |
275 | } | ||
276 | 2 | } | |
277 | #endif /* LV_HAVE_AVX */ | ||
278 | |||
279 | |||
280 | #ifdef LV_HAVE_SSE2 | ||
281 | #include <emmintrin.h> | ||
282 | |||
283 | 2 | static inline void volk_64f_convert_32f_a_sse2(float* outputVector, | |
284 | const double* inputVector, | ||
285 | unsigned int num_points) | ||
286 | { | ||
287 | 2 | unsigned int number = 0; | |
288 | |||
289 | 2 | const unsigned int quarterPoints = num_points / 4; | |
290 | |||
291 | 2 | const double* inputVectorPtr = (const double*)inputVector; | |
292 | 2 | float* outputVectorPtr = outputVector; | |
293 | __m128 ret, ret2; | ||
294 | __m128d inputVal1, inputVal2; | ||
295 | |||
296 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
297 | 65534 | inputVal1 = _mm_load_pd(inputVectorPtr); | |
298 | 65534 | inputVectorPtr += 2; | |
299 | 65534 | inputVal2 = _mm_load_pd(inputVectorPtr); | |
300 | 65534 | inputVectorPtr += 2; | |
301 | |||
302 | 65534 | ret = _mm_cvtpd_ps(inputVal1); | |
303 | 65534 | ret2 = _mm_cvtpd_ps(inputVal2); | |
304 | |||
305 | 65534 | ret = _mm_movelh_ps(ret, ret2); | |
306 | |||
307 | _mm_store_ps(outputVectorPtr, ret); | ||
308 | 65534 | outputVectorPtr += 4; | |
309 | } | ||
310 | |||
311 | 2 | number = quarterPoints * 4; | |
312 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
313 | 6 | outputVector[number] = (float)(inputVector[number]); | |
314 | } | ||
315 | 2 | } | |
316 | #endif /* LV_HAVE_SSE2 */ | ||
317 | |||
318 | |||
319 | #ifdef LV_HAVE_GENERIC | ||
320 | |||
321 | 2 | static inline void volk_64f_convert_32f_a_generic(float* outputVector, | |
322 | const double* inputVector, | ||
323 | unsigned int num_points) | ||
324 | { | ||
325 | 2 | float* outputVectorPtr = outputVector; | |
326 | 2 | const double* inputVectorPtr = inputVector; | |
327 | 2 | unsigned int number = 0; | |
328 | |||
329 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
330 | 262142 | *outputVectorPtr++ = ((float)(*inputVectorPtr++)); | |
331 | } | ||
332 | 2 | } | |
333 | #endif /* LV_HAVE_GENERIC */ | ||
334 | |||
335 | |||
336 | #endif /* INCLUDED_volk_64f_convert_32f_a_H */ | ||
337 |