GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_x2_min_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 70 108 64.8%
Functions: 5 7 71.4%
Branches: 22 34 64.7%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_x2_min_32f
12 *
13 * \b Overview
14 *
15 * Selects minimum value from each entry between bVector and aVector
16 * and store their results in the cVector
17 *
18 * c[i] = max(a[i], b[i])
19 *
20 * <b>Dispatcher Prototype</b>
21 * \code
22 * void volk_32f_x2_min_32f(float* cVector, const float* aVector, const float* bVector,
23 * unsigned int num_points) \endcode
24 *
25 * \b Inputs
26 * \li aVector: First input vector.
27 * \li bVector: Second input vector.
28 * \li num_points: The number of values in both input vectors.
29 *
30 * \b Outputs
31 * \li cVector: The output vector.
32 *
33 * \b Example
34 * \code
35 * int N = 10;
36 * unsigned int alignment = volk_get_alignment();
37 * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
38 * float* decreasing = (float*)volk_malloc(sizeof(float)*N, alignment);
39 * float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
40 *
41 * for(unsigned int ii = 0; ii < N; ++ii){
42 * increasing[ii] = (float)ii;
43 * decreasing[ii] = 10.f - (float)ii;
44 * }
45 *
46 * volk_32f_x2_min_32f(out, increasing, decreasing, N);
47 *
48 * for(unsigned int ii = 0; ii < N; ++ii){
49 * printf("out[%u] = %1.2f\n", ii, out[ii]);
50 * }
51 *
52 * volk_free(increasing);
53 * volk_free(decreasing);
54 * volk_free(out);
55 * \endcode
56 */
57
58 #ifndef INCLUDED_volk_32f_x2_min_32f_a_H
59 #define INCLUDED_volk_32f_x2_min_32f_a_H
60
61 #include <inttypes.h>
62 #include <stdio.h>
63
64 #ifdef LV_HAVE_SSE
65 #include <xmmintrin.h>
66
67 2 static inline void volk_32f_x2_min_32f_a_sse(float* cVector,
68 const float* aVector,
69 const float* bVector,
70 unsigned int num_points)
71 {
72 2 unsigned int number = 0;
73 2 const unsigned int quarterPoints = num_points / 4;
74
75 2 float* cPtr = cVector;
76 2 const float* aPtr = aVector;
77 2 const float* bPtr = bVector;
78
79 __m128 aVal, bVal, cVal;
80
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
81 65534 aVal = _mm_load_ps(aPtr);
82 65534 bVal = _mm_load_ps(bPtr);
83
84 65534 cVal = _mm_min_ps(aVal, bVal);
85
86 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
87
88 65534 aPtr += 4;
89 65534 bPtr += 4;
90 65534 cPtr += 4;
91 }
92
93 2 number = quarterPoints * 4;
94
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
95 6 const float a = *aPtr++;
96 6 const float b = *bPtr++;
97
2/2
✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.
6 *cPtr++ = (a < b ? a : b);
98 }
99 2 }
100 #endif /* LV_HAVE_SSE */
101
102
103 #ifdef LV_HAVE_NEON
104 #include <arm_neon.h>
105
106 static inline void volk_32f_x2_min_32f_neon(float* cVector,
107 const float* aVector,
108 const float* bVector,
109 unsigned int num_points)
110 {
111 float* cPtr = cVector;
112 const float* aPtr = aVector;
113 const float* bPtr = bVector;
114 unsigned int number = 0;
115 unsigned int quarter_points = num_points / 4;
116
117 float32x4_t a_vec, b_vec, c_vec;
118 for (number = 0; number < quarter_points; number++) {
119 a_vec = vld1q_f32(aPtr);
120 b_vec = vld1q_f32(bPtr);
121
122 c_vec = vminq_f32(a_vec, b_vec);
123
124 vst1q_f32(cPtr, c_vec);
125 aPtr += 4;
126 bPtr += 4;
127 cPtr += 4;
128 }
129
130 for (number = quarter_points * 4; number < num_points; number++) {
131 const float a = *aPtr++;
132 const float b = *bPtr++;
133 *cPtr++ = (a < b ? a : b);
134 }
135 }
136 #endif /* LV_HAVE_NEON */
137
138
139 #ifdef LV_HAVE_GENERIC
140
141 2 static inline void volk_32f_x2_min_32f_generic(float* cVector,
142 const float* aVector,
143 const float* bVector,
144 unsigned int num_points)
145 {
146 2 float* cPtr = cVector;
147 2 const float* aPtr = aVector;
148 2 const float* bPtr = bVector;
149 2 unsigned int number = 0;
150
151
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (number = 0; number < num_points; number++) {
152 262142 const float a = *aPtr++;
153 262142 const float b = *bPtr++;
154
2/2
✓ Branch 0 taken 131431 times.
✓ Branch 1 taken 130711 times.
262142 *cPtr++ = (a < b ? a : b);
155 }
156 2 }
157 #endif /* LV_HAVE_GENERIC */
158
159
160 #ifdef LV_HAVE_ORC
161
162 extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector,
163 const float* aVector,
164 const float* bVector,
165 unsigned int num_points);
166
167 2 static inline void volk_32f_x2_min_32f_u_orc(float* cVector,
168 const float* aVector,
169 const float* bVector,
170 unsigned int num_points)
171 {
172 2 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
173 2 }
174 #endif /* LV_HAVE_ORC */
175
176 #ifdef LV_HAVE_AVX
177 #include <immintrin.h>
178
179 2 static inline void volk_32f_x2_min_32f_a_avx(float* cVector,
180 const float* aVector,
181 const float* bVector,
182 unsigned int num_points)
183 {
184 2 unsigned int number = 0;
185 2 const unsigned int eighthPoints = num_points / 8;
186
187 2 float* cPtr = cVector;
188 2 const float* aPtr = aVector;
189 2 const float* bPtr = bVector;
190
191 __m256 aVal, bVal, cVal;
192
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
193 32766 aVal = _mm256_load_ps(aPtr);
194 32766 bVal = _mm256_load_ps(bPtr);
195
196 32766 cVal = _mm256_min_ps(aVal, bVal);
197
198 _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
199
200 32766 aPtr += 8;
201 32766 bPtr += 8;
202 32766 cPtr += 8;
203 }
204
205 2 number = eighthPoints * 8;
206
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
207 14 const float a = *aPtr++;
208 14 const float b = *bPtr++;
209
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 8 times.
14 *cPtr++ = (a < b ? a : b);
210 }
211 2 }
212 #endif /* LV_HAVE_AVX */
213
214 #ifdef LV_HAVE_AVX512F
215 #include <immintrin.h>
216
217 static inline void volk_32f_x2_min_32f_a_avx512f(float* cVector,
218 const float* aVector,
219 const float* bVector,
220 unsigned int num_points)
221 {
222 unsigned int number = 0;
223 const unsigned int sixteenthPoints = num_points / 16;
224
225 float* cPtr = cVector;
226 const float* aPtr = aVector;
227 const float* bPtr = bVector;
228
229 __m512 aVal, bVal, cVal;
230 for (; number < sixteenthPoints; number++) {
231 aVal = _mm512_load_ps(aPtr);
232 bVal = _mm512_load_ps(bPtr);
233
234 cVal = _mm512_min_ps(aVal, bVal);
235
236 _mm512_store_ps(cPtr, cVal); // Store the results back into the C container
237
238 aPtr += 16;
239 bPtr += 16;
240 cPtr += 16;
241 }
242
243 number = sixteenthPoints * 16;
244 for (; number < num_points; number++) {
245 const float a = *aPtr++;
246 const float b = *bPtr++;
247 *cPtr++ = (a < b ? a : b);
248 }
249 }
250 #endif /* LV_HAVE_AVX512F */
251
252 #endif /* INCLUDED_volk_32f_x2_min_32f_a_H */
253
254
255 #ifndef INCLUDED_volk_32f_x2_min_32f_u_H
256 #define INCLUDED_volk_32f_x2_min_32f_u_H
257
258 #include <inttypes.h>
259 #include <stdio.h>
260
261 #ifdef LV_HAVE_AVX512F
262 #include <immintrin.h>
263
264 static inline void volk_32f_x2_min_32f_u_avx512f(float* cVector,
265 const float* aVector,
266 const float* bVector,
267 unsigned int num_points)
268 {
269 unsigned int number = 0;
270 const unsigned int sixteenthPoints = num_points / 16;
271
272 float* cPtr = cVector;
273 const float* aPtr = aVector;
274 const float* bPtr = bVector;
275
276 __m512 aVal, bVal, cVal;
277 for (; number < sixteenthPoints; number++) {
278 aVal = _mm512_loadu_ps(aPtr);
279 bVal = _mm512_loadu_ps(bPtr);
280
281 cVal = _mm512_min_ps(aVal, bVal);
282
283 _mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
284
285 aPtr += 16;
286 bPtr += 16;
287 cPtr += 16;
288 }
289
290 number = sixteenthPoints * 16;
291 for (; number < num_points; number++) {
292 const float a = *aPtr++;
293 const float b = *bPtr++;
294 *cPtr++ = (a < b ? a : b);
295 }
296 }
297 #endif /* LV_HAVE_AVX512F */
298
299 #ifdef LV_HAVE_AVX
300 #include <immintrin.h>
301
302 2 static inline void volk_32f_x2_min_32f_u_avx(float* cVector,
303 const float* aVector,
304 const float* bVector,
305 unsigned int num_points)
306 {
307 2 unsigned int number = 0;
308 2 const unsigned int eighthPoints = num_points / 8;
309
310 2 float* cPtr = cVector;
311 2 const float* aPtr = aVector;
312 2 const float* bPtr = bVector;
313
314 __m256 aVal, bVal, cVal;
315
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
316 32766 aVal = _mm256_loadu_ps(aPtr);
317 32766 bVal = _mm256_loadu_ps(bPtr);
318
319 32766 cVal = _mm256_min_ps(aVal, bVal);
320
321 _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
322
323 32766 aPtr += 8;
324 32766 bPtr += 8;
325 32766 cPtr += 8;
326 }
327
328 2 number = eighthPoints * 8;
329
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
330 14 const float a = *aPtr++;
331 14 const float b = *bPtr++;
332
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 8 times.
14 *cPtr++ = (a < b ? a : b);
333 }
334 2 }
335 #endif /* LV_HAVE_AVX */
336
337 #endif /* INCLUDED_volk_32f_x2_min_32f_u_H */
338