GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_convert_64f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 76 76 100.0%
Functions: 6 6 100.0%
Branches: 20 20 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_convert_64f
12 *
13 * \b Overview
14 *
15 * Converts float values into doubles.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_32f_convert_64f(double* outputVector, const float* inputVector, unsigned int
20 * num_points) \endcode
21 *
22 * \b Inputs
23 * \li inputVector: The vector of floats to convert to doubles.
24 * \li num_points: The number of data points.
25 *
26 * \b Outputs
27 * \li outputVector: returns the converted doubles.
28 *
29 * \b Example
30 * Generate floats and convert them to doubles.
31 * \code
32 * int N = 10;
33 * unsigned int alignment = volk_get_alignment();
34 * float* in = (float*)volk_malloc(sizeof(float)*N, alignment);
35 * double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
36 *
37 * for(unsigned int ii = 0; ii < N; ++ii){
38 * in[ii] = (float)ii;
39 * }
40 *
41 * volk_32f_convert_64f(out, in, N);
42 *
43 * for(unsigned int ii = 0; ii < N; ++ii){
44 * printf("out(%i) = %g\n", ii, out[ii]);
45 * }
46 *
47 * volk_free(in);
48 * volk_free(out);
49 * \endcode
50 */
51
52
53 #ifndef INCLUDED_volk_32f_convert_64f_u_H
54 #define INCLUDED_volk_32f_convert_64f_u_H
55
56 #include <inttypes.h>
57 #include <stdio.h>
58
59 #ifdef LV_HAVE_AVX
60 #include <immintrin.h>
61
62 2 static inline void volk_32f_convert_64f_u_avx(double* outputVector,
63 const float* inputVector,
64 unsigned int num_points)
65 {
66 2 unsigned int number = 0;
67
68 2 const unsigned int quarterPoints = num_points / 4;
69
70 2 const float* inputVectorPtr = (const float*)inputVector;
71 2 double* outputVectorPtr = outputVector;
72 __m256d ret;
73 __m128 inputVal;
74
75
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
76 65534 inputVal = _mm_loadu_ps(inputVectorPtr);
77 65534 inputVectorPtr += 4;
78
79 65534 ret = _mm256_cvtps_pd(inputVal);
80 _mm256_storeu_pd(outputVectorPtr, ret);
81
82 65534 outputVectorPtr += 4;
83 }
84
85 2 number = quarterPoints * 4;
86
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
87 6 outputVector[number] = (double)(inputVector[number]);
88 }
89 2 }
90
91 #endif /* LV_HAVE_AVX */
92
93 #ifdef LV_HAVE_SSE2
94 #include <emmintrin.h>
95
96 2 static inline void volk_32f_convert_64f_u_sse2(double* outputVector,
97 const float* inputVector,
98 unsigned int num_points)
99 {
100 2 unsigned int number = 0;
101
102 2 const unsigned int quarterPoints = num_points / 4;
103
104 2 const float* inputVectorPtr = (const float*)inputVector;
105 2 double* outputVectorPtr = outputVector;
106 __m128d ret;
107 __m128 inputVal;
108
109
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
110 65534 inputVal = _mm_loadu_ps(inputVectorPtr);
111 65534 inputVectorPtr += 4;
112
113 65534 ret = _mm_cvtps_pd(inputVal);
114
115 _mm_storeu_pd(outputVectorPtr, ret);
116 65534 outputVectorPtr += 2;
117
118 65534 inputVal = _mm_movehl_ps(inputVal, inputVal);
119
120 65534 ret = _mm_cvtps_pd(inputVal);
121
122 _mm_storeu_pd(outputVectorPtr, ret);
123 65534 outputVectorPtr += 2;
124 }
125
126 2 number = quarterPoints * 4;
127
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
128 6 outputVector[number] = (double)(inputVector[number]);
129 }
130 2 }
131 #endif /* LV_HAVE_SSE2 */
132
133
134 #ifdef LV_HAVE_GENERIC
135
136 2 static inline void volk_32f_convert_64f_generic(double* outputVector,
137 const float* inputVector,
138 unsigned int num_points)
139 {
140 2 double* outputVectorPtr = outputVector;
141 2 const float* inputVectorPtr = inputVector;
142 2 unsigned int number = 0;
143
144
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
145 262142 *outputVectorPtr++ = ((double)(*inputVectorPtr++));
146 }
147 2 }
148 #endif /* LV_HAVE_GENERIC */
149
150
151 #endif /* INCLUDED_volk_32f_convert_64f_u_H */
152
153
154 #ifndef INCLUDED_volk_32f_convert_64f_a_H
155 #define INCLUDED_volk_32f_convert_64f_a_H
156
157 #include <inttypes.h>
158 #include <stdio.h>
159
160 #ifdef LV_HAVE_AVX
161 #include <immintrin.h>
162
163 2 static inline void volk_32f_convert_64f_a_avx(double* outputVector,
164 const float* inputVector,
165 unsigned int num_points)
166 {
167 2 unsigned int number = 0;
168
169 2 const unsigned int quarterPoints = num_points / 4;
170
171 2 const float* inputVectorPtr = (const float*)inputVector;
172 2 double* outputVectorPtr = outputVector;
173 __m256d ret;
174 __m128 inputVal;
175
176
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
177 65534 inputVal = _mm_load_ps(inputVectorPtr);
178 65534 inputVectorPtr += 4;
179
180 65534 ret = _mm256_cvtps_pd(inputVal);
181 _mm256_store_pd(outputVectorPtr, ret);
182
183 65534 outputVectorPtr += 4;
184 }
185
186 2 number = quarterPoints * 4;
187
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
188 6 outputVector[number] = (double)(inputVector[number]);
189 }
190 2 }
191 #endif /* LV_HAVE_AVX */
192
193 #ifdef LV_HAVE_SSE2
194 #include <emmintrin.h>
195
196 2 static inline void volk_32f_convert_64f_a_sse2(double* outputVector,
197 const float* inputVector,
198 unsigned int num_points)
199 {
200 2 unsigned int number = 0;
201
202 2 const unsigned int quarterPoints = num_points / 4;
203
204 2 const float* inputVectorPtr = (const float*)inputVector;
205 2 double* outputVectorPtr = outputVector;
206 __m128d ret;
207 __m128 inputVal;
208
209
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
210 65534 inputVal = _mm_load_ps(inputVectorPtr);
211 65534 inputVectorPtr += 4;
212
213 65534 ret = _mm_cvtps_pd(inputVal);
214
215 _mm_store_pd(outputVectorPtr, ret);
216 65534 outputVectorPtr += 2;
217
218 65534 inputVal = _mm_movehl_ps(inputVal, inputVal);
219
220 65534 ret = _mm_cvtps_pd(inputVal);
221
222 _mm_store_pd(outputVectorPtr, ret);
223 65534 outputVectorPtr += 2;
224 }
225
226 2 number = quarterPoints * 4;
227
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
228 6 outputVector[number] = (double)(inputVector[number]);
229 }
230 2 }
231 #endif /* LV_HAVE_SSE2 */
232
233
234 #ifdef LV_HAVE_GENERIC
235
236 2 static inline void volk_32f_convert_64f_a_generic(double* outputVector,
237 const float* inputVector,
238 unsigned int num_points)
239 {
240 2 double* outputVectorPtr = outputVector;
241 2 const float* inputVectorPtr = inputVector;
242 2 unsigned int number = 0;
243
244
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
245 262142 *outputVectorPtr++ = ((double)(*inputVectorPtr++));
246 }
247 2 }
248 #endif /* LV_HAVE_GENERIC */
249
250
251 #endif /* INCLUDED_volk_32f_convert_64f_a_H */
252