GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_64f_x2_max_64f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 67 105 63.8%
Functions: 4 6 66.7%
Branches: 22 34 64.7%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_64f_x2_max_64f
12 *
13 * \b Overview
14 *
15 * Selects maximum value from each entry between bVector and aVector
16 * and store their results in the cVector.
17 *
18 * c[i] = max(a[i], b[i])
19 *
20 * <b>Dispatcher Prototype</b>
21 * \code
22 * void volk_64f_x2_max_64f(double* cVector, const double* aVector, const double* bVector,
23 * unsigned int num_points) \endcode
24 *
25 * \b Inputs
26 * \li aVector: First input vector.
27 * \li bVector: Second input vector.
28 * \li num_points: The number of values in both input vectors.
29 *
30 * \b Outputs
31 * \li cVector: The output vector.
32 *
33 * \b Example
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * double* increasing = (double*)volk_malloc(sizeof(double)*N, alignment);
38 * double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
39 * double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
40 *
41 * for(unsigned int ii = 0; ii < N; ++ii){
42 * increasing[ii] = (double)ii;
43 * decreasing[ii] = 10.f - (double)ii;
44 * }
45 *
46 * volk_64f_x2_max_64f(out, increasing, decreasing, N);
47 *
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("out[%u] = %1.2g\n", ii, out[ii]);
50 * }
51 *
52 * volk_free(increasing);
53 * volk_free(decreasing);
54 * volk_free(out);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_64f_x2_max_64f_a_H
59 #define INCLUDED_volk_64f_x2_max_64f_a_H
60
61 #include <inttypes.h>
62 #include <stdio.h>
63
64 #ifdef LV_HAVE_AVX512F
65 #include <immintrin.h>
66
67 static inline void volk_64f_x2_max_64f_a_avx512f(double* cVector,
68 const double* aVector,
69 const double* bVector,
70 unsigned int num_points)
71 {
72 unsigned int number = 0;
73 const unsigned int eigthPoints = num_points / 8;
74
75 double* cPtr = cVector;
76 const double* aPtr = aVector;
77 const double* bPtr = bVector;
78
79 __m512d aVal, bVal, cVal;
80 for (; number < eigthPoints; number++) {
81
82 aVal = _mm512_load_pd(aPtr);
83 bVal = _mm512_load_pd(bPtr);
84
85 cVal = _mm512_max_pd(aVal, bVal);
86
87 _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
88
89 aPtr += 8;
90 bPtr += 8;
91 cPtr += 8;
92 }
93
94 number = eigthPoints * 8;
95 for (; number < num_points; number++) {
96 const double a = *aPtr++;
97 const double b = *bPtr++;
98 *cPtr++ = (a > b ? a : b);
99 }
100 }
101 #endif /* LV_HAVE_AVX512F */
102
103
104 #ifdef LV_HAVE_AVX
105 #include <immintrin.h>
106
107 2 static inline void volk_64f_x2_max_64f_a_avx(double* cVector,
108 const double* aVector,
109 const double* bVector,
110 unsigned int num_points)
111 {
112 2 unsigned int number = 0;
113 2 const unsigned int quarterPoints = num_points / 4;
114
115 2 double* cPtr = cVector;
116 2 const double* aPtr = aVector;
117 2 const double* bPtr = bVector;
118
119 __m256d aVal, bVal, cVal;
120
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
121
122 65534 aVal = _mm256_load_pd(aPtr);
123 65534 bVal = _mm256_load_pd(bPtr);
124
125 65534 cVal = _mm256_max_pd(aVal, bVal);
126
127 _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
128
129 65534 aPtr += 4;
130 65534 bPtr += 4;
131 65534 cPtr += 4;
132 }
133
134 2 number = quarterPoints * 4;
135
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
136 6 const double a = *aPtr++;
137 6 const double b = *bPtr++;
138
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 *cPtr++ = (a > b ? a : b);
139 }
140 2 }
141 #endif /* LV_HAVE_AVX */
142
143
144 #ifdef LV_HAVE_SSE2
145 #include <emmintrin.h>
146
147 2 static inline void volk_64f_x2_max_64f_a_sse2(double* cVector,
148 const double* aVector,
149 const double* bVector,
150 unsigned int num_points)
151 {
152 2 unsigned int number = 0;
153 2 const unsigned int halfPoints = num_points / 2;
154
155 2 double* cPtr = cVector;
156 2 const double* aPtr = aVector;
157 2 const double* bPtr = bVector;
158
159 __m128d aVal, bVal, cVal;
160
2/2
✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.
131072 for (; number < halfPoints; number++) {
161
162 131070 aVal = _mm_load_pd(aPtr);
163 131070 bVal = _mm_load_pd(bPtr);
164
165 131070 cVal = _mm_max_pd(aVal, bVal);
166
167 _mm_store_pd(cPtr, cVal); // Store the results back into the C container
168
169 131070 aPtr += 2;
170 131070 bPtr += 2;
171 131070 cPtr += 2;
172 }
173
174 2 number = halfPoints * 2;
175
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.
4 for (; number < num_points; number++) {
176 2 const double a = *aPtr++;
177 2 const double b = *bPtr++;
178
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.
2 *cPtr++ = (a > b ? a : b);
179 }
180 2 }
181 #endif /* LV_HAVE_SSE2 */
182
183
184 #ifdef LV_HAVE_GENERIC
185
186 2 static inline void volk_64f_x2_max_64f_generic(double* cVector,
187 const double* aVector,
188 const double* bVector,
189 unsigned int num_points)
190 {
191 2 double* cPtr = cVector;
192 2 const double* aPtr = aVector;
193 2 const double* bPtr = bVector;
194 2 unsigned int number = 0;
195
196
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
197 262142 const double a = *aPtr++;
198 262142 const double b = *bPtr++;
199
2/2
✓ Branch 0 taken 131024 times.
✓ Branch 1 taken 131118 times.
262142 *cPtr++ = (a > b ? a : b);
200 }
201 2 }
202 #endif /* LV_HAVE_GENERIC */
203
204
205 #endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
206
207
208 #ifndef INCLUDED_volk_64f_x2_max_64f_u_H
209 #define INCLUDED_volk_64f_x2_max_64f_u_H
210
211 #include <inttypes.h>
212 #include <stdio.h>
213
214 #ifdef LV_HAVE_AVX512F
215 #include <immintrin.h>
216
217 static inline void volk_64f_x2_max_64f_u_avx512f(double* cVector,
218 const double* aVector,
219 const double* bVector,
220 unsigned int num_points)
221 {
222 unsigned int number = 0;
223 const unsigned int eigthPoints = num_points / 8;
224
225 double* cPtr = cVector;
226 const double* aPtr = aVector;
227 const double* bPtr = bVector;
228
229 __m512d aVal, bVal, cVal;
230 for (; number < eigthPoints; number++) {
231
232 aVal = _mm512_loadu_pd(aPtr);
233 bVal = _mm512_loadu_pd(bPtr);
234
235 cVal = _mm512_max_pd(aVal, bVal);
236
237 _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
238
239 aPtr += 8;
240 bPtr += 8;
241 cPtr += 8;
242 }
243
244 number = eigthPoints * 8;
245 for (; number < num_points; number++) {
246 const double a = *aPtr++;
247 const double b = *bPtr++;
248 *cPtr++ = (a > b ? a : b);
249 }
250 }
251 #endif /* LV_HAVE_AVX512F */
252
253
254 #ifdef LV_HAVE_AVX
255 #include <immintrin.h>
256
257 2 static inline void volk_64f_x2_max_64f_u_avx(double* cVector,
258 const double* aVector,
259 const double* bVector,
260 unsigned int num_points)
261 {
262 2 unsigned int number = 0;
263 2 const unsigned int quarterPoints = num_points / 4;
264
265 2 double* cPtr = cVector;
266 2 const double* aPtr = aVector;
267 2 const double* bPtr = bVector;
268
269 __m256d aVal, bVal, cVal;
270
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
271
272 65534 aVal = _mm256_loadu_pd(aPtr);
273 65534 bVal = _mm256_loadu_pd(bPtr);
274
275 65534 cVal = _mm256_max_pd(aVal, bVal);
276
277 _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
278
279 65534 aPtr += 4;
280 65534 bPtr += 4;
281 65534 cPtr += 4;
282 }
283
284 2 number = quarterPoints * 4;
285
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
286 6 const double a = *aPtr++;
287 6 const double b = *bPtr++;
288
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 *cPtr++ = (a > b ? a : b);
289 }
290 2 }
291 #endif /* LV_HAVE_AVX */
292
293
294 #endif /* INCLUDED_volk_64f_x2_max_64f_u_H */
295