GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_64f_x2_max_64f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	67	105	63.8%
Functions:	4	6	66.7%
Branches:	22	34	64.7%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_64f_x2_max_64f
    
       *
    
       * \b Overview
    
       *
    
       * Selects maximum value from each entry between bVector and aVector
    
       * and store their results in the cVector.
    
       *
    
       * c[i] = max(a[i], b[i])
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_64f_x2_max_64f(double* cVector, const double* aVector, const double* bVector,
    
       * unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: First input vector.
    
       * \li bVector: Second input vector.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector.
    
       *
    
       * \b Example
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   double* increasing = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *   double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *   double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (double)ii;
    
       *       decreasing[ii] = 10.f - (double)ii;
    
       *   }
    
       *
    
       *   volk_64f_x2_max_64f(out, increasing, decreasing, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %1.2g\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(decreasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_64f_x2_max_64f_a_H
    
      #define INCLUDED_volk_64f_x2_max_64f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_64f_x2_max_64f_a_avx512f(double* cVector,
    
                                                       const double* aVector,
    
                                                       const double* bVector,
    
                                                       unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int eigthPoints = num_points / 8;
    
      ✗
          double* cPtr = cVector;
    
      ✗
          const double* aPtr = aVector;
    
      ✗
          const double* bPtr = bVector;
    
          __m512d aVal, bVal, cVal;
    
      ✗
          for (; number < eigthPoints; number++) {
    
      ✗
              aVal = _mm512_load_pd(aPtr);
    
      ✗
              bVal = _mm512_load_pd(bPtr);
    
      ✗
              cVal = _mm512_max_pd(aVal, bVal);
    
              _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 8;
    
      ✗
              bPtr += 8;
    
      ✗
              cPtr += 8;
    
          }
    
      ✗
          number = eigthPoints * 8;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              const double a = *aPtr++;
    
      ✗
              const double b = *bPtr++;
    
      ✗
              *cPtr++ = (a > b ? a : b);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_64f_x2_max_64f_a_avx(double* cVector,
    
                                                   const double* aVector,
    
                                                   const double* bVector,
    
                                                   unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m256d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm256_load_pd(aPtr);
    
      65534
              bVal = _mm256_load_pd(bPtr);
    
      65534
              cVal = _mm256_max_pd(aVal, bVal);
    
              _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              const double a = *aPtr++;
    
      6
              const double b = *bPtr++;
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
              *cPtr++ = (a > b ? a : b);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_64f_x2_max_64f_a_sse2(double* cVector,
    
                                                    const double* aVector,
    
                                                    const double* bVector,
    
                                                    unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int halfPoints = num_points / 2;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m128d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.

      131072
          for (; number < halfPoints; number++) {
    
      131070
              aVal = _mm_load_pd(aPtr);
    
      131070
              bVal = _mm_load_pd(bPtr);
    
      131070
              cVal = _mm_max_pd(aVal, bVal);
    
              _mm_store_pd(cPtr, cVal); // Store the results back into the C container
    
      131070
              aPtr += 2;
    
      131070
              bPtr += 2;
    
      131070
              cPtr += 2;
    
          }
    
      2
          number = halfPoints * 2;
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.

      4
          for (; number < num_points; number++) {
    
      2
              const double a = *aPtr++;
    
      2
              const double b = *bPtr++;
    
        2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 1 times.

      2
              *cPtr++ = (a > b ? a : b);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_64f_x2_max_64f_generic(double* cVector,
    
                                                     const double* aVector,
    
                                                     const double* bVector,
    
                                                     unsigned int num_points)
    
      {
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              const double a = *aPtr++;
    
      262142
              const double b = *bPtr++;
    
        2/2✓ Branch 0 taken 131024 times.
✓ Branch 1 taken 131118 times.

      262142
              *cPtr++ = (a > b ? a : b);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
    
      #ifndef INCLUDED_volk_64f_x2_max_64f_u_H
    
      #define INCLUDED_volk_64f_x2_max_64f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_64f_x2_max_64f_u_avx512f(double* cVector,
    
                                                       const double* aVector,
    
                                                       const double* bVector,
    
                                                       unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int eigthPoints = num_points / 8;
    
      ✗
          double* cPtr = cVector;
    
      ✗
          const double* aPtr = aVector;
    
      ✗
          const double* bPtr = bVector;
    
          __m512d aVal, bVal, cVal;
    
      ✗
          for (; number < eigthPoints; number++) {
    
      ✗
              aVal = _mm512_loadu_pd(aPtr);
    
      ✗
              bVal = _mm512_loadu_pd(bPtr);
    
      ✗
              cVal = _mm512_max_pd(aVal, bVal);
    
              _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 8;
    
      ✗
              bPtr += 8;
    
      ✗
              cPtr += 8;
    
          }
    
      ✗
          number = eigthPoints * 8;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              const double a = *aPtr++;
    
      ✗
              const double b = *bPtr++;
    
      ✗
              *cPtr++ = (a > b ? a : b);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_64f_x2_max_64f_u_avx(double* cVector,
    
                                                   const double* aVector,
    
                                                   const double* bVector,
    
                                                   unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m256d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm256_loadu_pd(aPtr);
    
      65534
              bVal = _mm256_loadu_pd(bPtr);
    
      65534
              cVal = _mm256_max_pd(aVal, bVal);
    
              _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              const double a = *aPtr++;
    
      6
              const double b = *bPtr++;
    
        2/2✓ Branch 0 taken 3 times.
✓ Branch 1 taken 3 times.

      6
              *cPtr++ = (a > b ? a : b);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_64f_x2_max_64f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_64f_x2_max_64f
12			*
13			* \b Overview
14			*
15			* Selects maximum value from each entry between bVector and aVector
16			* and store their results in the cVector.
17			*
18			* c[i] = max(a[i], b[i])
19			*
20			* <b>Dispatcher Prototype</b>
21			* \code
22			* void volk_64f_x2_max_64f(double* cVector, const double* aVector, const double* bVector,
23			* unsigned int num_points) \endcode
24			*
25			* \b Inputs
26			* \li aVector: First input vector.
27			* \li bVector: Second input vector.
28			* \li num_points: The number of values in both input vectors.
29			*
30			* \b Outputs
31			* \li cVector: The output vector.
32			*
33			* \b Example
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* double* increasing = (double)volk_malloc(sizeof(double)N, alignment);
38			* double* decreasing = (double)volk_malloc(sizeof(double)N, alignment);
39			* double* out = (double)volk_malloc(sizeof(double)N, alignment);
40			*
41			* for(unsigned int ii = 0; ii < N; ++ii){
42			* increasing[ii] = (double)ii;
43			* decreasing[ii] = 10.f - (double)ii;
44			* }
45			*
46			* volk_64f_x2_max_64f(out, increasing, decreasing, N);
47			*
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out[%u] = %1.2g\n", ii, out[ii]);
50			* }
51			*
52			* volk_free(increasing);
53			* volk_free(decreasing);
54			* volk_free(out);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_64f_x2_max_64f_a_H
59			#define INCLUDED_volk_64f_x2_max_64f_a_H
60
61			#include <inttypes.h>
62			#include <stdio.h>
63
64			#ifdef LV_HAVE_AVX512F
65			#include <immintrin.h>
66
67		✗	static inline void volk_64f_x2_max_64f_a_avx512f(double* cVector,
68			const double* aVector,
69			const double* bVector,
70			unsigned int num_points)
71			{
72		✗	unsigned int number = 0;
73		✗	const unsigned int eigthPoints = num_points / 8;
74
75		✗	double* cPtr = cVector;
76		✗	const double* aPtr = aVector;
77		✗	const double* bPtr = bVector;
78
79			__m512d aVal, bVal, cVal;
80		✗	for (; number < eigthPoints; number++) {
81
82		✗	aVal = _mm512_load_pd(aPtr);
83		✗	bVal = _mm512_load_pd(bPtr);
84
85		✗	cVal = _mm512_max_pd(aVal, bVal);
86
87			_mm512_store_pd(cPtr, cVal); // Store the results back into the C container
88
89		✗	aPtr += 8;
90		✗	bPtr += 8;
91		✗	cPtr += 8;
92			}
93
94		✗	number = eigthPoints * 8;
95		✗	for (; number < num_points; number++) {
96		✗	const double a = *aPtr++;
97		✗	const double b = *bPtr++;
98		✗	*cPtr++ = (a > b ? a : b);
99			}
100		✗	}
101			#endif /* LV_HAVE_AVX512F */
102
103
104			#ifdef LV_HAVE_AVX
105			#include <immintrin.h>
106
107		2	static inline void volk_64f_x2_max_64f_a_avx(double* cVector,
108			const double* aVector,
109			const double* bVector,
110			unsigned int num_points)
111			{
112		2	unsigned int number = 0;
113		2	const unsigned int quarterPoints = num_points / 4;
114
115		2	double* cPtr = cVector;
116		2	const double* aPtr = aVector;
117		2	const double* bPtr = bVector;
118
119			__m256d aVal, bVal, cVal;
120	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
121
122		65534	aVal = _mm256_load_pd(aPtr);
123		65534	bVal = _mm256_load_pd(bPtr);
124
125		65534	cVal = _mm256_max_pd(aVal, bVal);
126
127			_mm256_store_pd(cPtr, cVal); // Store the results back into the C container
128
129		65534	aPtr += 4;
130		65534	bPtr += 4;
131		65534	cPtr += 4;
132			}
133
134		2	number = quarterPoints * 4;
135	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
136		6	const double a = *aPtr++;
137		6	const double b = *bPtr++;
138	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	*cPtr++ = (a > b ? a : b);
139			}
140		2	}
141			#endif /* LV_HAVE_AVX */
142
143
144			#ifdef LV_HAVE_SSE2
145			#include <emmintrin.h>
146
147		2	static inline void volk_64f_x2_max_64f_a_sse2(double* cVector,
148			const double* aVector,
149			const double* bVector,
150			unsigned int num_points)
151			{
152		2	unsigned int number = 0;
153		2	const unsigned int halfPoints = num_points / 2;
154
155		2	double* cPtr = cVector;
156		2	const double* aPtr = aVector;
157		2	const double* bPtr = bVector;
158
159			__m128d aVal, bVal, cVal;
160	2/2 ✓ Branch 0 taken 131070 times. ✓ Branch 1 taken 2 times.	131072	for (; number < halfPoints; number++) {
161
162		131070	aVal = _mm_load_pd(aPtr);
163		131070	bVal = _mm_load_pd(bPtr);
164
165		131070	cVal = _mm_max_pd(aVal, bVal);
166
167			_mm_store_pd(cPtr, cVal); // Store the results back into the C container
168
169		131070	aPtr += 2;
170		131070	bPtr += 2;
171		131070	cPtr += 2;
172			}
173
174		2	number = halfPoints * 2;
175	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 2 times.	4	for (; number < num_points; number++) {
176		2	const double a = *aPtr++;
177		2	const double b = *bPtr++;
178	2/2 ✓ Branch 0 taken 1 times. ✓ Branch 1 taken 1 times.	2	*cPtr++ = (a > b ? a : b);
179			}
180		2	}
181			#endif /* LV_HAVE_SSE2 */
182
183
184			#ifdef LV_HAVE_GENERIC
185
186		2	static inline void volk_64f_x2_max_64f_generic(double* cVector,
187			const double* aVector,
188			const double* bVector,
189			unsigned int num_points)
190			{
191		2	double* cPtr = cVector;
192		2	const double* aPtr = aVector;
193		2	const double* bPtr = bVector;
194		2	unsigned int number = 0;
195
196	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
197		262142	const double a = *aPtr++;
198		262142	const double b = *bPtr++;
199	2/2 ✓ Branch 0 taken 131024 times. ✓ Branch 1 taken 131118 times.	262142	*cPtr++ = (a > b ? a : b);
200			}
201		2	}
202			#endif /* LV_HAVE_GENERIC */
203
204
205			#endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
206
207
208			#ifndef INCLUDED_volk_64f_x2_max_64f_u_H
209			#define INCLUDED_volk_64f_x2_max_64f_u_H
210
211			#include <inttypes.h>
212			#include <stdio.h>
213
214			#ifdef LV_HAVE_AVX512F
215			#include <immintrin.h>
216
217		✗	static inline void volk_64f_x2_max_64f_u_avx512f(double* cVector,
218			const double* aVector,
219			const double* bVector,
220			unsigned int num_points)
221			{
222		✗	unsigned int number = 0;
223		✗	const unsigned int eigthPoints = num_points / 8;
224
225		✗	double* cPtr = cVector;
226		✗	const double* aPtr = aVector;
227		✗	const double* bPtr = bVector;
228
229			__m512d aVal, bVal, cVal;
230		✗	for (; number < eigthPoints; number++) {
231
232		✗	aVal = _mm512_loadu_pd(aPtr);
233		✗	bVal = _mm512_loadu_pd(bPtr);
234
235		✗	cVal = _mm512_max_pd(aVal, bVal);
236
237			_mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
238
239		✗	aPtr += 8;
240		✗	bPtr += 8;
241		✗	cPtr += 8;
242			}
243
244		✗	number = eigthPoints * 8;
245		✗	for (; number < num_points; number++) {
246		✗	const double a = *aPtr++;
247		✗	const double b = *bPtr++;
248		✗	*cPtr++ = (a > b ? a : b);
249			}
250		✗	}
251			#endif /* LV_HAVE_AVX512F */
252
253
254			#ifdef LV_HAVE_AVX
255			#include <immintrin.h>
256
257		2	static inline void volk_64f_x2_max_64f_u_avx(double* cVector,
258			const double* aVector,
259			const double* bVector,
260			unsigned int num_points)
261			{
262		2	unsigned int number = 0;
263		2	const unsigned int quarterPoints = num_points / 4;
264
265		2	double* cPtr = cVector;
266		2	const double* aPtr = aVector;
267		2	const double* bPtr = bVector;
268
269			__m256d aVal, bVal, cVal;
270	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
271
272		65534	aVal = _mm256_loadu_pd(aPtr);
273		65534	bVal = _mm256_loadu_pd(bPtr);
274
275		65534	cVal = _mm256_max_pd(aVal, bVal);
276
277			_mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
278
279		65534	aPtr += 4;
280		65534	bPtr += 4;
281		65534	cPtr += 4;
282			}
283
284		2	number = quarterPoints * 4;
285	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
286		6	const double a = *aPtr++;
287		6	const double b = *bPtr++;
288	2/2 ✓ Branch 0 taken 3 times. ✓ Branch 1 taken 3 times.	6	*cPtr++ = (a > b ? a : b);
289			}
290		2	}
291			#endif /* LV_HAVE_AVX */
292
293
294			#endif /* INCLUDED_volk_64f_x2_max_64f_u_H */
295