GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_64f_multiply_64f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	56	56	100.0%
Functions:	3	3	100.0%
Branches:	10	10	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2018 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_64f_multiply_64f
    
       *
    
       * \b Overview
    
       *
    
       * Multiplies two input double-precision doubleing point vectors together.
    
       *
    
       * c[i] = a[i] * b[i]
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_64f_multiply_64f(double* cVector, const double* aVector, const double*
    
       * bVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: First input vector.
    
       * \li bVector: Second input vector.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector.
    
       *
    
       * \b Example
    
       * Multiply elements of an increasing vector by those of a decreasing vector.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   double* decreasing = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *   double* out = (double*)volk_malloc(sizeof(double)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (double)ii;
    
       *       decreasing[ii] = 10.f - (double)ii;
    
       *   }
    
       *
    
       *   volk_32f_64f_multiply_64f(out, increasing, decreasing, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %1.2F\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(decreasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_64f_multiply_64f_H
    
      #define INCLUDED_volk_32f_64f_multiply_64f_H
    
      #include <inttypes.h>
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_64f_multiply_64f_generic(double* cVector,
    
                                                           const float* aVector,
    
                                                           const double* bVector,
    
                                                           unsigned int num_points)
    
      {
    
      2
          double* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      /*
    
       * Unaligned versions
    
       */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_64f_multiply_64f_u_avx(double* cVector,
    
                                                         const float* aVector,
    
                                                         const double* bVector,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighth_points = num_points / 8;
    
      2
          double* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m256 aVal;
    
          __m128 aVal1, aVal2;
    
          __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighth_points; number++) {
    
      32766
              aVal = _mm256_loadu_ps(aPtr);
    
      32766
              bVal1 = _mm256_loadu_pd(bPtr);
    
      32766
              bVal2 = _mm256_loadu_pd(bPtr + 4);
    
      32766
              aVal1 = _mm256_extractf128_ps(aVal, 0);
    
      32766
              aVal2 = _mm256_extractf128_ps(aVal, 1);
    
      32766
              aDbl1 = _mm256_cvtps_pd(aVal1);
    
      32766
              aDbl2 = _mm256_cvtps_pd(aVal2);
    
      32766
              cVal1 = _mm256_mul_pd(aDbl1, bVal1);
    
      32766
              cVal2 = _mm256_mul_pd(aDbl2, bVal2);
    
              _mm256_storeu_pd(cPtr, cVal1);     // Store the results back into the C container
    
      32766
              _mm256_storeu_pd(cPtr + 4, cVal2); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighth_points * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_64f_multiply_64f_a_avx(double* cVector,
    
                                                         const float* aVector,
    
                                                         const double* bVector,
    
                                                         unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighth_points = num_points / 8;
    
      2
          double* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const double* bPtr = bVector;
    
          __m256 aVal;
    
          __m128 aVal1, aVal2;
    
          __m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighth_points; number++) {
    
      32766
              aVal = _mm256_load_ps(aPtr);
    
      32766
              bVal1 = _mm256_load_pd(bPtr);
    
      32766
              bVal2 = _mm256_load_pd(bPtr + 4);
    
      32766
              aVal1 = _mm256_extractf128_ps(aVal, 0);
    
      32766
              aVal2 = _mm256_extractf128_ps(aVal, 1);
    
      32766
              aDbl1 = _mm256_cvtps_pd(aVal1);
    
      32766
              aDbl2 = _mm256_cvtps_pd(aVal2);
    
      32766
              cVal1 = _mm256_mul_pd(aDbl1, bVal1);
    
      32766
              cVal2 = _mm256_mul_pd(aDbl2, bVal2);
    
              _mm256_store_pd(cPtr, cVal1);     // Store the results back into the C container
    
      32766
              _mm256_store_pd(cPtr + 4, cVal2); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighth_points * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = ((double)(*aPtr++)) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2018 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_64f_multiply_64f
12			*
13			* \b Overview
14			*
15			* Multiplies two input double-precision doubleing point vectors together.
16			*
17			* c[i] = a[i] * b[i]
18			*
19			* <b>Dispatcher Prototype</b>
20			* \code
21			* void volk_32f_64f_multiply_64f(double* cVector, const double* aVector, const double*
22			* bVector, unsigned int num_points) \endcode
23			*
24			* \b Inputs
25			* \li aVector: First input vector.
26			* \li bVector: Second input vector.
27			* \li num_points: The number of values in both input vectors.
28			*
29			* \b Outputs
30			* \li cVector: The output vector.
31			*
32			* \b Example
33			* Multiply elements of an increasing vector by those of a decreasing vector.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
38			* double* decreasing = (double)volk_malloc(sizeof(double)N, alignment);
39			* double* out = (double)volk_malloc(sizeof(double)N, alignment);
40			*
41			* for(unsigned int ii = 0; ii < N; ++ii){
42			* increasing[ii] = (double)ii;
43			* decreasing[ii] = 10.f - (double)ii;
44			* }
45			*
46			* volk_32f_64f_multiply_64f(out, increasing, decreasing, N);
47			*
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out[%u] = %1.2F\n", ii, out[ii]);
50			* }
51			*
52			* volk_free(increasing);
53			* volk_free(decreasing);
54			* volk_free(out);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32f_64f_multiply_64f_H
59			#define INCLUDED_volk_32f_64f_multiply_64f_H
60
61			#include <inttypes.h>
62
63
64			#ifdef LV_HAVE_GENERIC
65
66		2	static inline void volk_32f_64f_multiply_64f_generic(double* cVector,
67			const float* aVector,
68			const double* bVector,
69			unsigned int num_points)
70			{
71		2	double* cPtr = cVector;
72		2	const float* aPtr = aVector;
73		2	const double* bPtr = bVector;
74		2	unsigned int number = 0;
75
76	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
77		262142	cPtr++ = ((double)(aPtr++)) * (*bPtr++);
78			}
79		2	}
80
81			#endif /* LV_HAVE_GENERIC */
82
83			/*
84			* Unaligned versions
85			*/
86
87
88			#ifdef LV_HAVE_AVX
89
90			#include <immintrin.h>
91			#include <xmmintrin.h>
92
93		2	static inline void volk_32f_64f_multiply_64f_u_avx(double* cVector,
94			const float* aVector,
95			const double* bVector,
96			unsigned int num_points)
97			{
98		2	unsigned int number = 0;
99		2	const unsigned int eighth_points = num_points / 8;
100
101		2	double* cPtr = cVector;
102		2	const float* aPtr = aVector;
103		2	const double* bPtr = bVector;
104
105			__m256 aVal;
106			__m128 aVal1, aVal2;
107			__m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
108	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighth_points; number++) {
109
110		32766	aVal = _mm256_loadu_ps(aPtr);
111		32766	bVal1 = _mm256_loadu_pd(bPtr);
112		32766	bVal2 = _mm256_loadu_pd(bPtr + 4);
113
114		32766	aVal1 = _mm256_extractf128_ps(aVal, 0);
115		32766	aVal2 = _mm256_extractf128_ps(aVal, 1);
116
117		32766	aDbl1 = _mm256_cvtps_pd(aVal1);
118		32766	aDbl2 = _mm256_cvtps_pd(aVal2);
119
120		32766	cVal1 = _mm256_mul_pd(aDbl1, bVal1);
121		32766	cVal2 = _mm256_mul_pd(aDbl2, bVal2);
122
123			_mm256_storeu_pd(cPtr, cVal1); // Store the results back into the C container
124		32766	_mm256_storeu_pd(cPtr + 4, cVal2); // Store the results back into the C container
125
126		32766	aPtr += 8;
127		32766	bPtr += 8;
128		32766	cPtr += 8;
129			}
130
131		2	number = eighth_points * 8;
132	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
133		14	cPtr++ = ((double)(aPtr++)) * (*bPtr++);
134			}
135		2	}
136
137			#endif /* LV_HAVE_AVX */
138
139
140			#ifdef LV_HAVE_AVX
141
142			#include <immintrin.h>
143			#include <xmmintrin.h>
144
145		2	static inline void volk_32f_64f_multiply_64f_a_avx(double* cVector,
146			const float* aVector,
147			const double* bVector,
148			unsigned int num_points)
149			{
150		2	unsigned int number = 0;
151		2	const unsigned int eighth_points = num_points / 8;
152
153		2	double* cPtr = cVector;
154		2	const float* aPtr = aVector;
155		2	const double* bPtr = bVector;
156
157			__m256 aVal;
158			__m128 aVal1, aVal2;
159			__m256d aDbl1, aDbl2, bVal1, bVal2, cVal1, cVal2;
160	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighth_points; number++) {
161
162		32766	aVal = _mm256_load_ps(aPtr);
163		32766	bVal1 = _mm256_load_pd(bPtr);
164		32766	bVal2 = _mm256_load_pd(bPtr + 4);
165
166		32766	aVal1 = _mm256_extractf128_ps(aVal, 0);
167		32766	aVal2 = _mm256_extractf128_ps(aVal, 1);
168
169		32766	aDbl1 = _mm256_cvtps_pd(aVal1);
170		32766	aDbl2 = _mm256_cvtps_pd(aVal2);
171
172		32766	cVal1 = _mm256_mul_pd(aDbl1, bVal1);
173		32766	cVal2 = _mm256_mul_pd(aDbl2, bVal2);
174
175			_mm256_store_pd(cPtr, cVal1); // Store the results back into the C container
176		32766	_mm256_store_pd(cPtr + 4, cVal2); // Store the results back into the C container
177
178		32766	aPtr += 8;
179		32766	bPtr += 8;
180		32766	cPtr += 8;
181			}
182
183		2	number = eighth_points * 8;
184	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
185		14	cPtr++ = ((double)(aPtr++)) * (*bPtr++);
186			}
187		2	}
188
189			#endif /* LV_HAVE_AVX */
190
191
192			#endif /* INCLUDED_volk_32f_64f_multiply_64f_u_H */
193