GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_64f_x2_multiply_64f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	76	76	100.0%
Functions:	5	5	100.0%
Branches:	18	18	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2018 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_64f_x2_multiply_64f
    
       *
    
       * \b Overview
    
       *
    
       * Multiplies two input double-precision floating point vectors together.
    
       *
    
       * c[i] = a[i] * b[i]
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_64f_x2_multiply_64f(float* cVector, const float* aVector, const float*
    
       * bVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: First input vector.
    
       * \li bVector: Second input vector.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector.
    
       *
    
       * \b Example
    
       * Multiply elements of an increasing vector by those of a decreasing vector.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   double* increasing = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *   double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *   double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (float)ii;
    
       *       decreasing[ii] = 10.f - (float)ii;
    
       *   }
    
       *
    
       *   volk_64f_x2_multiply_64f(out, increasing, decreasing, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %1.2F\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(decreasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_64f_x2_multiply_64f_H
    
      #define INCLUDED_volk_64f_x2_multiply_64f_H
    
      #include <inttypes.h>
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_64f_x2_multiply_64f_generic(double* cVector,
    
                                                          const double* aVector,
    
                                                          const double* bVector,
    
                                                          unsigned int num_points)
    
      {
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      /*
    
       * Unaligned versions
    
       */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_64f_x2_multiply_64f_u_sse2(double* cVector,
    
                                                         const double* aVector,
    
                                                         const double* bVector,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int half_points = num_points / 2;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m128d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.

      131072
          for (; number < half_points; number++) {
    
      131070
              aVal = _mm_loadu_pd(aPtr);
    
      131070
              bVal = _mm_loadu_pd(bPtr);
    
      131070
              cVal = _mm_mul_pd(aVal, bVal);
    
              _mm_storeu_pd(cPtr, cVal); // Store the results back into the C container
    
      131070
              aPtr += 2;
    
      131070
              bPtr += 2;
    
      131070
              cPtr += 2;
    
          }
    
      2
          number = half_points * 2;
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.

      4
          for (; number < num_points; number++) {
    
      2
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_64f_x2_multiply_64f_u_avx(double* cVector,
    
                                                        const double* aVector,
    
                                                        const double* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarter_points = num_points / 4;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m256d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarter_points; number++) {
    
      65534
              aVal = _mm256_loadu_pd(aPtr);
    
      65534
              bVal = _mm256_loadu_pd(bPtr);
    
      65534
              cVal = _mm256_mul_pd(aVal, bVal);
    
              _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarter_points * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      /*
    
       * Aligned versions
    
       */
    
      #ifdef LV_HAVE_SSE2
    
      #include <emmintrin.h>
    
      2
      static inline void volk_64f_x2_multiply_64f_a_sse2(double* cVector,
    
                                                         const double* aVector,
    
                                                         const double* bVector,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int half_points = num_points / 2;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m128d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 131070 times.
✓ Branch 1 taken 2 times.

      131072
          for (; number < half_points; number++) {
    
      131070
              aVal = _mm_load_pd(aPtr);
    
      131070
              bVal = _mm_load_pd(bPtr);
    
      131070
              cVal = _mm_mul_pd(aVal, bVal);
    
              _mm_store_pd(cPtr, cVal); // Store the results back into the C container
    
      131070
              aPtr += 2;
    
      131070
              bPtr += 2;
    
      131070
              cPtr += 2;
    
          }
    
      2
          number = half_points * 2;
    
        2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 2 times.

      4
          for (; number < num_points; number++) {
    
      2
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE2 */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_64f_x2_multiply_64f_a_avx(double* cVector,
    
                                                        const double* aVector,
    
                                                        const double* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarter_points = num_points / 4;
    
      2
          double* cPtr = cVector;
    
      2
          const double* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m256d aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarter_points; number++) {
    
      65534
              aVal = _mm256_load_pd(aPtr);
    
      65534
              bVal = _mm256_load_pd(bPtr);
    
      65534
              cVal = _mm256_mul_pd(aVal, bVal);
    
              _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarter_points * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_64f_x2_multiply_64f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2018 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_64f_x2_multiply_64f
12			*
13			* \b Overview
14			*
15			* Multiplies two input double-precision floating point vectors together.
16			*
17			* c[i] = a[i] * b[i]
18			*
19			* <b>Dispatcher Prototype</b>
20			* \code
21			* void volk_64f_x2_multiply_64f(float* cVector, const float* aVector, const float*
22			* bVector, unsigned int num_points) \endcode
23			*
24			* \b Inputs
25			* \li aVector: First input vector.
26			* \li bVector: Second input vector.
27			* \li num_points: The number of values in both input vectors.
28			*
29			* \b Outputs
30			* \li cVector: The output vector.
31			*
32			* \b Example
33			* Multiply elements of an increasing vector by those of a decreasing vector.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* double* increasing = (double)volk_malloc(sizeof(double)N, alignment);
38			* double* decreasing = (double)volk_malloc(sizeof(double)N, alignment);
39			* double* out = (double)volk_malloc(sizeof(double)N, alignment);
40			*
41			* for(unsigned int ii = 0; ii < N; ++ii){
42			* increasing[ii] = (float)ii;
43			* decreasing[ii] = 10.f - (float)ii;
44			* }
45			*
46			* volk_64f_x2_multiply_64f(out, increasing, decreasing, N);
47			*
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out[%u] = %1.2F\n", ii, out[ii]);
50			* }
51			*
52			* volk_free(increasing);
53			* volk_free(decreasing);
54			* volk_free(out);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_64f_x2_multiply_64f_H
59			#define INCLUDED_volk_64f_x2_multiply_64f_H
60
61			#include <inttypes.h>
62
63
64			#ifdef LV_HAVE_GENERIC
65
66		2	static inline void volk_64f_x2_multiply_64f_generic(double* cVector,
67			const double* aVector,
68			const double* bVector,
69			unsigned int num_points)
70			{
71		2	double* cPtr = cVector;
72		2	const double* aPtr = aVector;
73		2	const double* bPtr = bVector;
74		2	unsigned int number = 0;
75
76	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
77		262142	cPtr++ = (aPtr++) * (*bPtr++);
78			}
79		2	}
80
81			#endif /* LV_HAVE_GENERIC */
82
83			/*
84			* Unaligned versions
85			*/
86
87			#ifdef LV_HAVE_SSE2
88
89			#include <emmintrin.h>
90
91		2	static inline void volk_64f_x2_multiply_64f_u_sse2(double* cVector,
92			const double* aVector,
93			const double* bVector,
94			unsigned int num_points)
95			{
96		2	unsigned int number = 0;
97		2	const unsigned int half_points = num_points / 2;
98
99		2	double* cPtr = cVector;
100		2	const double* aPtr = aVector;
101		2	const double* bPtr = bVector;
102
103			__m128d aVal, bVal, cVal;
104	2/2 ✓ Branch 0 taken 131070 times. ✓ Branch 1 taken 2 times.	131072	for (; number < half_points; number++) {
105		131070	aVal = _mm_loadu_pd(aPtr);
106		131070	bVal = _mm_loadu_pd(bPtr);
107
108		131070	cVal = _mm_mul_pd(aVal, bVal);
109
110			_mm_storeu_pd(cPtr, cVal); // Store the results back into the C container
111
112		131070	aPtr += 2;
113		131070	bPtr += 2;
114		131070	cPtr += 2;
115			}
116
117		2	number = half_points * 2;
118	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 2 times.	4	for (; number < num_points; number++) {
119		2	cPtr++ = (aPtr++) * (*bPtr++);
120			}
121		2	}
122
123			#endif /* LV_HAVE_SSE2 */
124
125
126			#ifdef LV_HAVE_AVX
127
128			#include <immintrin.h>
129
130		2	static inline void volk_64f_x2_multiply_64f_u_avx(double* cVector,
131			const double* aVector,
132			const double* bVector,
133			unsigned int num_points)
134			{
135		2	unsigned int number = 0;
136		2	const unsigned int quarter_points = num_points / 4;
137
138		2	double* cPtr = cVector;
139		2	const double* aPtr = aVector;
140		2	const double* bPtr = bVector;
141
142			__m256d aVal, bVal, cVal;
143	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarter_points; number++) {
144
145		65534	aVal = _mm256_loadu_pd(aPtr);
146		65534	bVal = _mm256_loadu_pd(bPtr);
147
148		65534	cVal = _mm256_mul_pd(aVal, bVal);
149
150			_mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
151
152		65534	aPtr += 4;
153		65534	bPtr += 4;
154		65534	cPtr += 4;
155			}
156
157		2	number = quarter_points * 4;
158	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
159		6	cPtr++ = (aPtr++) * (*bPtr++);
160			}
161		2	}
162
163			#endif /* LV_HAVE_AVX */
164
165			/*
166			* Aligned versions
167			*/
168
169			#ifdef LV_HAVE_SSE2
170
171			#include <emmintrin.h>
172
173		2	static inline void volk_64f_x2_multiply_64f_a_sse2(double* cVector,
174			const double* aVector,
175			const double* bVector,
176			unsigned int num_points)
177			{
178		2	unsigned int number = 0;
179		2	const unsigned int half_points = num_points / 2;
180
181		2	double* cPtr = cVector;
182		2	const double* aPtr = aVector;
183		2	const double* bPtr = bVector;
184
185			__m128d aVal, bVal, cVal;
186	2/2 ✓ Branch 0 taken 131070 times. ✓ Branch 1 taken 2 times.	131072	for (; number < half_points; number++) {
187		131070	aVal = _mm_load_pd(aPtr);
188		131070	bVal = _mm_load_pd(bPtr);
189
190		131070	cVal = _mm_mul_pd(aVal, bVal);
191
192			_mm_store_pd(cPtr, cVal); // Store the results back into the C container
193
194		131070	aPtr += 2;
195		131070	bPtr += 2;
196		131070	cPtr += 2;
197			}
198
199		2	number = half_points * 2;
200	2/2 ✓ Branch 0 taken 2 times. ✓ Branch 1 taken 2 times.	4	for (; number < num_points; number++) {
201		2	cPtr++ = (aPtr++) * (*bPtr++);
202			}
203		2	}
204
205			#endif /* LV_HAVE_SSE2 */
206
207
208			#ifdef LV_HAVE_AVX
209
210			#include <immintrin.h>
211
212		2	static inline void volk_64f_x2_multiply_64f_a_avx(double* cVector,
213			const double* aVector,
214			const double* bVector,
215			unsigned int num_points)
216			{
217		2	unsigned int number = 0;
218		2	const unsigned int quarter_points = num_points / 4;
219
220		2	double* cPtr = cVector;
221		2	const double* aPtr = aVector;
222		2	const double* bPtr = bVector;
223
224			__m256d aVal, bVal, cVal;
225	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarter_points; number++) {
226
227		65534	aVal = _mm256_load_pd(aPtr);
228		65534	bVal = _mm256_load_pd(bPtr);
229
230		65534	cVal = _mm256_mul_pd(aVal, bVal);
231
232			_mm256_store_pd(cPtr, cVal); // Store the results back into the C container
233
234		65534	aPtr += 4;
235		65534	bPtr += 4;
236		65534	cPtr += 4;
237			}
238
239		2	number = quarter_points * 4;
240	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
241		6	cPtr++ = (aPtr++) * (*bPtr++);
242			}
243		2	}
244
245			#endif /* LV_HAVE_AVX */
246
247			#endif /* INCLUDED_volk_64f_x2_multiply_64f_u_H */
248