Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32f_convert_64f | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Converts float values into doubles. | ||
16 | * | ||
17 | * <b>Dispatcher Prototype</b> | ||
18 | * \code | ||
19 | * void volk_32f_convert_64f(double* outputVector, const float* inputVector, unsigned int | ||
20 | * num_points) \endcode | ||
21 | * | ||
22 | * \b Inputs | ||
23 | * \li inputVector: The vector of floats to convert to doubles. | ||
24 | * \li num_points: The number of data points. | ||
25 | * | ||
26 | * \b Outputs | ||
27 | * \li outputVector: returns the converted doubles. | ||
28 | * | ||
29 | * \b Example | ||
30 | * Generate floats and convert them to doubles. | ||
31 | * \code | ||
32 | * int N = 10; | ||
33 | * unsigned int alignment = volk_get_alignment(); | ||
34 | * float* in = (float*)volk_malloc(sizeof(float)*N, alignment); | ||
35 | * double* out = (double*)volk_malloc(sizeof(double)*N, alignment); | ||
36 | * | ||
37 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
38 | * in[ii] = (float)ii; | ||
39 | * } | ||
40 | * | ||
41 | * volk_32f_convert_64f(out, in, N); | ||
42 | * | ||
43 | * for(unsigned int ii = 0; ii < N; ++ii){ | ||
44 | * printf("out(%i) = %g\n", ii, out[ii]); | ||
45 | * } | ||
46 | * | ||
47 | * volk_free(in); | ||
48 | * volk_free(out); | ||
49 | * \endcode | ||
50 | */ | ||
51 | |||
52 | |||
53 | #ifndef INCLUDED_volk_32f_convert_64f_u_H | ||
54 | #define INCLUDED_volk_32f_convert_64f_u_H | ||
55 | |||
56 | #include <inttypes.h> | ||
57 | #include <stdio.h> | ||
58 | |||
59 | #ifdef LV_HAVE_AVX | ||
60 | #include <immintrin.h> | ||
61 | |||
62 | 2 | static inline void volk_32f_convert_64f_u_avx(double* outputVector, | |
63 | const float* inputVector, | ||
64 | unsigned int num_points) | ||
65 | { | ||
66 | 2 | unsigned int number = 0; | |
67 | |||
68 | 2 | const unsigned int quarterPoints = num_points / 4; | |
69 | |||
70 | 2 | const float* inputVectorPtr = (const float*)inputVector; | |
71 | 2 | double* outputVectorPtr = outputVector; | |
72 | __m256d ret; | ||
73 | __m128 inputVal; | ||
74 | |||
75 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
76 | 65534 | inputVal = _mm_loadu_ps(inputVectorPtr); | |
77 | 65534 | inputVectorPtr += 4; | |
78 | |||
79 | 65534 | ret = _mm256_cvtps_pd(inputVal); | |
80 | _mm256_storeu_pd(outputVectorPtr, ret); | ||
81 | |||
82 | 65534 | outputVectorPtr += 4; | |
83 | } | ||
84 | |||
85 | 2 | number = quarterPoints * 4; | |
86 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
87 | 6 | outputVector[number] = (double)(inputVector[number]); | |
88 | } | ||
89 | 2 | } | |
90 | |||
91 | #endif /* LV_HAVE_AVX */ | ||
92 | |||
93 | #ifdef LV_HAVE_SSE2 | ||
94 | #include <emmintrin.h> | ||
95 | |||
96 | 2 | static inline void volk_32f_convert_64f_u_sse2(double* outputVector, | |
97 | const float* inputVector, | ||
98 | unsigned int num_points) | ||
99 | { | ||
100 | 2 | unsigned int number = 0; | |
101 | |||
102 | 2 | const unsigned int quarterPoints = num_points / 4; | |
103 | |||
104 | 2 | const float* inputVectorPtr = (const float*)inputVector; | |
105 | 2 | double* outputVectorPtr = outputVector; | |
106 | __m128d ret; | ||
107 | __m128 inputVal; | ||
108 | |||
109 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
110 | 65534 | inputVal = _mm_loadu_ps(inputVectorPtr); | |
111 | 65534 | inputVectorPtr += 4; | |
112 | |||
113 | 65534 | ret = _mm_cvtps_pd(inputVal); | |
114 | |||
115 | _mm_storeu_pd(outputVectorPtr, ret); | ||
116 | 65534 | outputVectorPtr += 2; | |
117 | |||
118 | 65534 | inputVal = _mm_movehl_ps(inputVal, inputVal); | |
119 | |||
120 | 65534 | ret = _mm_cvtps_pd(inputVal); | |
121 | |||
122 | _mm_storeu_pd(outputVectorPtr, ret); | ||
123 | 65534 | outputVectorPtr += 2; | |
124 | } | ||
125 | |||
126 | 2 | number = quarterPoints * 4; | |
127 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
128 | 6 | outputVector[number] = (double)(inputVector[number]); | |
129 | } | ||
130 | 2 | } | |
131 | #endif /* LV_HAVE_SSE2 */ | ||
132 | |||
133 | |||
134 | #ifdef LV_HAVE_GENERIC | ||
135 | |||
136 | 2 | static inline void volk_32f_convert_64f_generic(double* outputVector, | |
137 | const float* inputVector, | ||
138 | unsigned int num_points) | ||
139 | { | ||
140 | 2 | double* outputVectorPtr = outputVector; | |
141 | 2 | const float* inputVectorPtr = inputVector; | |
142 | 2 | unsigned int number = 0; | |
143 | |||
144 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
145 | 262142 | *outputVectorPtr++ = ((double)(*inputVectorPtr++)); | |
146 | } | ||
147 | 2 | } | |
148 | #endif /* LV_HAVE_GENERIC */ | ||
149 | |||
150 | |||
151 | #endif /* INCLUDED_volk_32f_convert_64f_u_H */ | ||
152 | |||
153 | |||
154 | #ifndef INCLUDED_volk_32f_convert_64f_a_H | ||
155 | #define INCLUDED_volk_32f_convert_64f_a_H | ||
156 | |||
157 | #include <inttypes.h> | ||
158 | #include <stdio.h> | ||
159 | |||
160 | #ifdef LV_HAVE_AVX | ||
161 | #include <immintrin.h> | ||
162 | |||
163 | 2 | static inline void volk_32f_convert_64f_a_avx(double* outputVector, | |
164 | const float* inputVector, | ||
165 | unsigned int num_points) | ||
166 | { | ||
167 | 2 | unsigned int number = 0; | |
168 | |||
169 | 2 | const unsigned int quarterPoints = num_points / 4; | |
170 | |||
171 | 2 | const float* inputVectorPtr = (const float*)inputVector; | |
172 | 2 | double* outputVectorPtr = outputVector; | |
173 | __m256d ret; | ||
174 | __m128 inputVal; | ||
175 | |||
176 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
177 | 65534 | inputVal = _mm_load_ps(inputVectorPtr); | |
178 | 65534 | inputVectorPtr += 4; | |
179 | |||
180 | 65534 | ret = _mm256_cvtps_pd(inputVal); | |
181 | _mm256_store_pd(outputVectorPtr, ret); | ||
182 | |||
183 | 65534 | outputVectorPtr += 4; | |
184 | } | ||
185 | |||
186 | 2 | number = quarterPoints * 4; | |
187 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
188 | 6 | outputVector[number] = (double)(inputVector[number]); | |
189 | } | ||
190 | 2 | } | |
191 | #endif /* LV_HAVE_AVX */ | ||
192 | |||
193 | #ifdef LV_HAVE_SSE2 | ||
194 | #include <emmintrin.h> | ||
195 | |||
196 | 2 | static inline void volk_32f_convert_64f_a_sse2(double* outputVector, | |
197 | const float* inputVector, | ||
198 | unsigned int num_points) | ||
199 | { | ||
200 | 2 | unsigned int number = 0; | |
201 | |||
202 | 2 | const unsigned int quarterPoints = num_points / 4; | |
203 | |||
204 | 2 | const float* inputVectorPtr = (const float*)inputVector; | |
205 | 2 | double* outputVectorPtr = outputVector; | |
206 | __m128d ret; | ||
207 | __m128 inputVal; | ||
208 | |||
209 |
2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
|
65536 | for (; number < quarterPoints; number++) { |
210 | 65534 | inputVal = _mm_load_ps(inputVectorPtr); | |
211 | 65534 | inputVectorPtr += 4; | |
212 | |||
213 | 65534 | ret = _mm_cvtps_pd(inputVal); | |
214 | |||
215 | _mm_store_pd(outputVectorPtr, ret); | ||
216 | 65534 | outputVectorPtr += 2; | |
217 | |||
218 | 65534 | inputVal = _mm_movehl_ps(inputVal, inputVal); | |
219 | |||
220 | 65534 | ret = _mm_cvtps_pd(inputVal); | |
221 | |||
222 | _mm_store_pd(outputVectorPtr, ret); | ||
223 | 65534 | outputVectorPtr += 2; | |
224 | } | ||
225 | |||
226 | 2 | number = quarterPoints * 4; | |
227 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (; number < num_points; number++) { |
228 | 6 | outputVector[number] = (double)(inputVector[number]); | |
229 | } | ||
230 | 2 | } | |
231 | #endif /* LV_HAVE_SSE2 */ | ||
232 | |||
233 | |||
234 | #ifdef LV_HAVE_GENERIC | ||
235 | |||
236 | 2 | static inline void volk_32f_convert_64f_a_generic(double* outputVector, | |
237 | const float* inputVector, | ||
238 | unsigned int num_points) | ||
239 | { | ||
240 | 2 | double* outputVectorPtr = outputVector; | |
241 | 2 | const float* inputVectorPtr = inputVector; | |
242 | 2 | unsigned int number = 0; | |
243 | |||
244 |
2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
|
262144 | for (number = 0; number < num_points; number++) { |
245 | 262142 | *outputVectorPtr++ = ((double)(*inputVectorPtr++)); | |
246 | } | ||
247 | 2 | } | |
248 | #endif /* LV_HAVE_GENERIC */ | ||
249 | |||
250 | |||
251 | #endif /* INCLUDED_volk_32f_convert_64f_a_H */ | ||
252 |