GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_64f_convert_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 86 122 70.5%
Functions: 6 8 75.0%
Branches: 20 28 71.4%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_64f_convert_32f
12 *
13 * \b Overview
14 *
15 * Converts doubles into floats.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_64f_convert_32f(float* outputVector, const double* inputVector, unsigned int
20 * num_points) \endcode
21 *
22 * \b Inputs
23 * \li inputVector: The vector of doubles to convert to floats.
24 * \li num_points: The number of data points.
25 *
26 * \b Outputs
27 * \li outputVector: returns the converted floats.
28 *
29 * \b Example
30 * \code
31 * int N = 10;
32 * unsigned int alignment = volk_get_alignment();
33 * double* increasing = (double*)volk_malloc(sizeof(double)*N, alignment);
34 * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
35 *
36 * for(unsigned int ii = 0; ii < N; ++ii){
37 * increasing[ii] = (double)ii;
38 * }
39 *
40 * volk_64f_convert_32f(out, increasing, N);
41 *
42 * for(unsigned int ii = 0; ii < N; ++ii){
43 * printf("out[%u] = %1.2f\n", ii, out[ii]);
44 * }
45 *
46 * volk_free(increasing);
47 * volk_free(out);
48 * \endcode
49 */
50
51 #ifndef INCLUDED_volk_64f_convert_32f_u_H
52 #define INCLUDED_volk_64f_convert_32f_u_H
53
54 #include <inttypes.h>
55 #include <stdio.h>
56
57 #ifdef LV_HAVE_AVX512F
58 #include <immintrin.h>
59
60 static inline void volk_64f_convert_32f_u_avx512f(float* outputVector,
61 const double* inputVector,
62 unsigned int num_points)
63 {
64 unsigned int number = 0;
65
66 const unsigned int oneSixteenthPoints = num_points / 16;
67
68 const double* inputVectorPtr = (const double*)inputVector;
69 float* outputVectorPtr = outputVector;
70 __m256 ret1, ret2;
71 __m512d inputVal1, inputVal2;
72
73 for (; number < oneSixteenthPoints; number++) {
74 inputVal1 = _mm512_loadu_pd(inputVectorPtr);
75 inputVectorPtr += 8;
76 inputVal2 = _mm512_loadu_pd(inputVectorPtr);
77 inputVectorPtr += 8;
78
79 ret1 = _mm512_cvtpd_ps(inputVal1);
80 ret2 = _mm512_cvtpd_ps(inputVal2);
81
82 _mm256_storeu_ps(outputVectorPtr, ret1);
83 outputVectorPtr += 8;
84
85 _mm256_storeu_ps(outputVectorPtr, ret2);
86 outputVectorPtr += 8;
87 }
88
89 number = oneSixteenthPoints * 16;
90 for (; number < num_points; number++) {
91 outputVector[number] = (float)(inputVector[number]);
92 }
93 }
94 #endif /* LV_HAVE_AVX512F */
95
96
97 #ifdef LV_HAVE_AVX
98 #include <immintrin.h>
99
100 2 static inline void volk_64f_convert_32f_u_avx(float* outputVector,
101 const double* inputVector,
102 unsigned int num_points)
103 {
104 2 unsigned int number = 0;
105
106 2 const unsigned int oneEightPoints = num_points / 8;
107
108 2 const double* inputVectorPtr = (const double*)inputVector;
109 2 float* outputVectorPtr = outputVector;
110 __m128 ret1, ret2;
111 __m256d inputVal1, inputVal2;
112
113
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < oneEightPoints; number++) {
114 32766 inputVal1 = _mm256_loadu_pd(inputVectorPtr);
115 32766 inputVectorPtr += 4;
116 32766 inputVal2 = _mm256_loadu_pd(inputVectorPtr);
117 32766 inputVectorPtr += 4;
118
119 32766 ret1 = _mm256_cvtpd_ps(inputVal1);
120 32766 ret2 = _mm256_cvtpd_ps(inputVal2);
121
122 _mm_storeu_ps(outputVectorPtr, ret1);
123 32766 outputVectorPtr += 4;
124
125 _mm_storeu_ps(outputVectorPtr, ret2);
126 32766 outputVectorPtr += 4;
127 }
128
129 2 number = oneEightPoints * 8;
130
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
131 14 outputVector[number] = (float)(inputVector[number]);
132 }
133 2 }
134 #endif /* LV_HAVE_AVX */
135
136
137 #ifdef LV_HAVE_SSE2
138 #include <emmintrin.h>
139
140 2 static inline void volk_64f_convert_32f_u_sse2(float* outputVector,
141 const double* inputVector,
142 unsigned int num_points)
143 {
144 2 unsigned int number = 0;
145
146 2 const unsigned int quarterPoints = num_points / 4;
147
148 2 const double* inputVectorPtr = (const double*)inputVector;
149 2 float* outputVectorPtr = outputVector;
150 __m128 ret, ret2;
151 __m128d inputVal1, inputVal2;
152
153
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
154 65534 inputVal1 = _mm_loadu_pd(inputVectorPtr);
155 65534 inputVectorPtr += 2;
156 65534 inputVal2 = _mm_loadu_pd(inputVectorPtr);
157 65534 inputVectorPtr += 2;
158
159 65534 ret = _mm_cvtpd_ps(inputVal1);
160 65534 ret2 = _mm_cvtpd_ps(inputVal2);
161
162 65534 ret = _mm_movelh_ps(ret, ret2);
163
164 _mm_storeu_ps(outputVectorPtr, ret);
165 65534 outputVectorPtr += 4;
166 }
167
168 2 number = quarterPoints * 4;
169
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
170 6 outputVector[number] = (float)(inputVector[number]);
171 }
172 2 }
173 #endif /* LV_HAVE_SSE2 */
174
175
176 #ifdef LV_HAVE_GENERIC
177
178 2 static inline void volk_64f_convert_32f_generic(float* outputVector,
179 const double* inputVector,
180 unsigned int num_points)
181 {
182 2 float* outputVectorPtr = outputVector;
183 2 const double* inputVectorPtr = inputVector;
184 2 unsigned int number = 0;
185
186
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
187 262142 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
188 }
189 2 }
190 #endif /* LV_HAVE_GENERIC */
191
192
193 #endif /* INCLUDED_volk_64f_convert_32f_u_H */
194 #ifndef INCLUDED_volk_64f_convert_32f_a_H
195 #define INCLUDED_volk_64f_convert_32f_a_H
196
197 #include <inttypes.h>
198 #include <stdio.h>
199
200 #ifdef LV_HAVE_AVX512F
201 #include <immintrin.h>
202
203 static inline void volk_64f_convert_32f_a_avx512f(float* outputVector,
204 const double* inputVector,
205 unsigned int num_points)
206 {
207 unsigned int number = 0;
208
209 const unsigned int oneSixteenthPoints = num_points / 16;
210
211 const double* inputVectorPtr = (const double*)inputVector;
212 float* outputVectorPtr = outputVector;
213 __m256 ret1, ret2;
214 __m512d inputVal1, inputVal2;
215
216 for (; number < oneSixteenthPoints; number++) {
217 inputVal1 = _mm512_load_pd(inputVectorPtr);
218 inputVectorPtr += 8;
219 inputVal2 = _mm512_load_pd(inputVectorPtr);
220 inputVectorPtr += 8;
221
222 ret1 = _mm512_cvtpd_ps(inputVal1);
223 ret2 = _mm512_cvtpd_ps(inputVal2);
224
225 _mm256_store_ps(outputVectorPtr, ret1);
226 outputVectorPtr += 8;
227
228 _mm256_store_ps(outputVectorPtr, ret2);
229 outputVectorPtr += 8;
230 }
231
232 number = oneSixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = (float)(inputVector[number]);
235 }
236 }
237 #endif /* LV_HAVE_AVX512F */
238
239
240 #ifdef LV_HAVE_AVX
241 #include <immintrin.h>
242
243 2 static inline void volk_64f_convert_32f_a_avx(float* outputVector,
244 const double* inputVector,
245 unsigned int num_points)
246 {
247 2 unsigned int number = 0;
248
249 2 const unsigned int oneEightPoints = num_points / 8;
250
251 2 const double* inputVectorPtr = (const double*)inputVector;
252 2 float* outputVectorPtr = outputVector;
253 __m128 ret1, ret2;
254 __m256d inputVal1, inputVal2;
255
256
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < oneEightPoints; number++) {
257 32766 inputVal1 = _mm256_load_pd(inputVectorPtr);
258 32766 inputVectorPtr += 4;
259 32766 inputVal2 = _mm256_load_pd(inputVectorPtr);
260 32766 inputVectorPtr += 4;
261
262 32766 ret1 = _mm256_cvtpd_ps(inputVal1);
263 32766 ret2 = _mm256_cvtpd_ps(inputVal2);
264
265 _mm_store_ps(outputVectorPtr, ret1);
266 32766 outputVectorPtr += 4;
267
268 _mm_store_ps(outputVectorPtr, ret2);
269 32766 outputVectorPtr += 4;
270 }
271
272 2 number = oneEightPoints * 8;
273
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
274 14 outputVector[number] = (float)(inputVector[number]);
275 }
276 2 }
277 #endif /* LV_HAVE_AVX */
278
279
280 #ifdef LV_HAVE_SSE2
281 #include <emmintrin.h>
282
283 2 static inline void volk_64f_convert_32f_a_sse2(float* outputVector,
284 const double* inputVector,
285 unsigned int num_points)
286 {
287 2 unsigned int number = 0;
288
289 2 const unsigned int quarterPoints = num_points / 4;
290
291 2 const double* inputVectorPtr = (const double*)inputVector;
292 2 float* outputVectorPtr = outputVector;
293 __m128 ret, ret2;
294 __m128d inputVal1, inputVal2;
295
296
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
297 65534 inputVal1 = _mm_load_pd(inputVectorPtr);
298 65534 inputVectorPtr += 2;
299 65534 inputVal2 = _mm_load_pd(inputVectorPtr);
300 65534 inputVectorPtr += 2;
301
302 65534 ret = _mm_cvtpd_ps(inputVal1);
303 65534 ret2 = _mm_cvtpd_ps(inputVal2);
304
305 65534 ret = _mm_movelh_ps(ret, ret2);
306
307 _mm_store_ps(outputVectorPtr, ret);
308 65534 outputVectorPtr += 4;
309 }
310
311 2 number = quarterPoints * 4;
312
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
313 6 outputVector[number] = (float)(inputVector[number]);
314 }
315 2 }
316 #endif /* LV_HAVE_SSE2 */
317
318
319 #ifdef LV_HAVE_GENERIC
320
321 2 static inline void volk_64f_convert_32f_a_generic(float* outputVector,
322 const double* inputVector,
323 unsigned int num_points)
324 {
325 2 float* outputVectorPtr = outputVector;
326 2 const double* inputVectorPtr = inputVector;
327 2 unsigned int number = 0;
328
329
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
330 262142 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
331 }
332 2 }
333 #endif /* LV_HAVE_GENERIC */
334
335
336 #endif /* INCLUDED_volk_64f_convert_32f_a_H */
337