GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_x2_multiply_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	87	121	71.9%
Functions:	7	9	77.8%
Branches:	20	28	71.4%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_x2_multiply_32f
    
       *
    
       * \b Overview
    
       *
    
       * Multiplies two input floating point vectors together.
    
       *
    
       * c[i] = a[i] * b[i]
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_x2_multiply_32f(float* cVector, const float* aVector, const float*
    
       * bVector, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: First input vector.
    
       * \li bVector: Second input vector.
    
       * \li num_points: The number of values in both input vectors.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector.
    
       *
    
       * \b Example
    
       * Multiply elements of an increasing vector by those of a decreasing vector.
    
       * \code
    
       *   int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* decreasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (float)ii;
    
       *       decreasing[ii] = 10.f - (float)ii;
    
       *   }
    
       *
    
       *   volk_32f_x2_multiply_32f(out, increasing, decreasing, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %1.2f\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(decreasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
    
      #define INCLUDED_volk_32f_x2_multiply_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m128 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_loadu_ps(aPtr);
    
      65534
              bVal = _mm_loadu_ps(bPtr);
    
      65534
              cVal = _mm_mul_ps(aVal, bVal);
    
              _mm_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32f_x2_multiply_32f_u_avx512f(float* cVector,
    
                                                            const float* aVector,
    
                                                            const float* bVector,
    
                                                            unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int sixteenthPoints = num_points / 16;
    
      ✗
          float* cPtr = cVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          const float* bPtr = bVector;
    
          __m512 aVal, bVal, cVal;
    
      ✗
          for (; number < sixteenthPoints; number++) {
    
      ✗
              aVal = _mm512_loadu_ps(aPtr);
    
      ✗
              bVal = _mm512_loadu_ps(bPtr);
    
      ✗
              cVal = _mm512_mul_ps(aVal, bVal);
    
              _mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 16;
    
      ✗
              bPtr += 16;
    
      ✗
              cPtr += 16;
    
          }
    
      ✗
          number = sixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m256 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_loadu_ps(aPtr);
    
      32766
              bVal = _mm256_loadu_ps(bPtr);
    
      32766
              cVal = _mm256_mul_ps(aVal, bVal);
    
              _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_x2_multiply_32f_generic(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float* bVector,
    
                                                          unsigned int num_points)
    
      {
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
    
      #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
    
      #define INCLUDED_volk_32f_x2_multiply_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m128 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_load_ps(aPtr);
    
      65534
              bVal = _mm_load_ps(bPtr);
    
      65534
              cVal = _mm_mul_ps(aVal, bVal);
    
              _mm_store_ps(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              bPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_AVX512F
    
      #include <immintrin.h>
    
      ✗
      static inline void volk_32f_x2_multiply_32f_a_avx512f(float* cVector,
    
                                                            const float* aVector,
    
                                                            const float* bVector,
    
                                                            unsigned int num_points)
    
      {
    
      ✗
          unsigned int number = 0;
    
      ✗
          const unsigned int sixteenthPoints = num_points / 16;
    
      ✗
          float* cPtr = cVector;
    
      ✗
          const float* aPtr = aVector;
    
      ✗
          const float* bPtr = bVector;
    
          __m512 aVal, bVal, cVal;
    
      ✗
          for (; number < sixteenthPoints; number++) {
    
      ✗
              aVal = _mm512_load_ps(aPtr);
    
      ✗
              bVal = _mm512_load_ps(bPtr);
    
      ✗
              cVal = _mm512_mul_ps(aVal, bVal);
    
              _mm512_store_ps(cPtr, cVal); // Store the results back into the C container
    
      ✗
              aPtr += 16;
    
      ✗
              bPtr += 16;
    
      ✗
              cPtr += 16;
    
          }
    
      ✗
          number = sixteenthPoints * 16;
    
      ✗
          for (; number < num_points; number++) {
    
      ✗
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      ✗
      }
    
      #endif /* LV_HAVE_AVX512F */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
          __m256 aVal, bVal, cVal;
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_load_ps(aPtr);
    
      32766
              bVal = _mm256_load_ps(bPtr);
    
      32766
              cVal = _mm256_mul_ps(aVal, bVal);
    
              _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              bPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32f_x2_multiply_32f_neon(float* cVector,
    
                                                       const float* aVector,
    
                                                       const float* bVector,
    
                                                       unsigned int num_points)
    
      {
    
          const unsigned int quarter_points = num_points / 4;
    
          unsigned int number;
    
          float32x4_t avec, bvec, cvec;
    
          for (number = 0; number < quarter_points; ++number) {
    
              avec = vld1q_f32(aVector);
    
              bvec = vld1q_f32(bVector);
    
              cvec = vmulq_f32(avec, bvec);
    
              vst1q_f32(cVector, cvec);
    
              aVector += 4;
    
              bVector += 4;
    
              cVector += 4;
    
          }
    
          for (number = quarter_points * 4; number < num_points; ++number) {
    
              *cVector++ = *aVector++ * *bVector++;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector,
    
                                                            const float* aVector,
    
                                                            const float* bVector,
    
                                                            unsigned int num_points)
    
      {
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
      2
          const float* bPtr = bVector;
    
      2
          unsigned int number = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *cPtr++ = (*aPtr++) * (*bPtr++);
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_ORC
    
      extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector,
    
                                                      const float* aVector,
    
                                                      const float* bVector,
    
                                                      unsigned int num_points);
    
      2
      static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float* bVector,
    
                                                        unsigned int num_points)
    
      {
    
      2
          volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
    
      2
      }
    
      #endif /* LV_HAVE_ORC */
    
      #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_x2_multiply_32f
12			*
13			* \b Overview
14			*
15			* Multiplies two input floating point vectors together.
16			*
17			* c[i] = a[i] * b[i]
18			*
19			* <b>Dispatcher Prototype</b>
20			* \code
21			* void volk_32f_x2_multiply_32f(float* cVector, const float* aVector, const float*
22			* bVector, unsigned int num_points) \endcode
23			*
24			* \b Inputs
25			* \li aVector: First input vector.
26			* \li bVector: Second input vector.
27			* \li num_points: The number of values in both input vectors.
28			*
29			* \b Outputs
30			* \li cVector: The output vector.
31			*
32			* \b Example
33			* Multiply elements of an increasing vector by those of a decreasing vector.
34			* \code
35			* int N = 10;
36			* unsigned int alignment = volk_get_alignment();
37			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
38			* float* decreasing = (float)volk_malloc(sizeof(float)N, alignment);
39			* float* out = (float)volk_malloc(sizeof(float)N, alignment);
40			*
41			* for(unsigned int ii = 0; ii < N; ++ii){
42			* increasing[ii] = (float)ii;
43			* decreasing[ii] = 10.f - (float)ii;
44			* }
45			*
46			* volk_32f_x2_multiply_32f(out, increasing, decreasing, N);
47			*
48			* for(unsigned int ii = 0; ii < N; ++ii){
49			* printf("out[%u] = %1.2f\n", ii, out[ii]);
50			* }
51			*
52			* volk_free(increasing);
53			* volk_free(decreasing);
54			* volk_free(out);
55			* \endcode
56			*/
57
58			#ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
59			#define INCLUDED_volk_32f_x2_multiply_32f_u_H
60
61			#include <inttypes.h>
62			#include <stdio.h>
63
64			#ifdef LV_HAVE_SSE
65			#include <xmmintrin.h>
66
67		2	static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector,
68			const float* aVector,
69			const float* bVector,
70			unsigned int num_points)
71			{
72		2	unsigned int number = 0;
73		2	const unsigned int quarterPoints = num_points / 4;
74
75		2	float* cPtr = cVector;
76		2	const float* aPtr = aVector;
77		2	const float* bPtr = bVector;
78
79			__m128 aVal, bVal, cVal;
80	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
81
82		65534	aVal = _mm_loadu_ps(aPtr);
83		65534	bVal = _mm_loadu_ps(bPtr);
84
85		65534	cVal = _mm_mul_ps(aVal, bVal);
86
87			_mm_storeu_ps(cPtr, cVal); // Store the results back into the C container
88
89		65534	aPtr += 4;
90		65534	bPtr += 4;
91		65534	cPtr += 4;
92			}
93
94		2	number = quarterPoints * 4;
95	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
96		6	cPtr++ = (aPtr++) * (*bPtr++);
97			}
98		2	}
99			#endif /* LV_HAVE_SSE */
100
101			#ifdef LV_HAVE_AVX512F
102			#include <immintrin.h>
103
104		✗	static inline void volk_32f_x2_multiply_32f_u_avx512f(float* cVector,
105			const float* aVector,
106			const float* bVector,
107			unsigned int num_points)
108			{
109		✗	unsigned int number = 0;
110		✗	const unsigned int sixteenthPoints = num_points / 16;
111
112		✗	float* cPtr = cVector;
113		✗	const float* aPtr = aVector;
114		✗	const float* bPtr = bVector;
115
116			__m512 aVal, bVal, cVal;
117		✗	for (; number < sixteenthPoints; number++) {
118
119		✗	aVal = _mm512_loadu_ps(aPtr);
120		✗	bVal = _mm512_loadu_ps(bPtr);
121
122		✗	cVal = _mm512_mul_ps(aVal, bVal);
123
124			_mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
125
126		✗	aPtr += 16;
127		✗	bPtr += 16;
128		✗	cPtr += 16;
129			}
130
131		✗	number = sixteenthPoints * 16;
132		✗	for (; number < num_points; number++) {
133		✗	cPtr++ = (aPtr++) * (*bPtr++);
134			}
135		✗	}
136			#endif /* LV_HAVE_AVX512F */
137
138			#ifdef LV_HAVE_AVX
139			#include <immintrin.h>
140
141		2	static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector,
142			const float* aVector,
143			const float* bVector,
144			unsigned int num_points)
145			{
146		2	unsigned int number = 0;
147		2	const unsigned int eighthPoints = num_points / 8;
148
149		2	float* cPtr = cVector;
150		2	const float* aPtr = aVector;
151		2	const float* bPtr = bVector;
152
153			__m256 aVal, bVal, cVal;
154	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
155
156		32766	aVal = _mm256_loadu_ps(aPtr);
157		32766	bVal = _mm256_loadu_ps(bPtr);
158
159		32766	cVal = _mm256_mul_ps(aVal, bVal);
160
161			_mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
162
163		32766	aPtr += 8;
164		32766	bPtr += 8;
165		32766	cPtr += 8;
166			}
167
168		2	number = eighthPoints * 8;
169	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
170		14	cPtr++ = (aPtr++) * (*bPtr++);
171			}
172		2	}
173			#endif /* LV_HAVE_AVX */
174
175
176			#ifdef LV_HAVE_GENERIC
177
178		2	static inline void volk_32f_x2_multiply_32f_generic(float* cVector,
179			const float* aVector,
180			const float* bVector,
181			unsigned int num_points)
182			{
183		2	float* cPtr = cVector;
184		2	const float* aPtr = aVector;
185		2	const float* bPtr = bVector;
186		2	unsigned int number = 0;
187
188	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
189		262142	cPtr++ = (aPtr++) * (*bPtr++);
190			}
191		2	}
192			#endif /* LV_HAVE_GENERIC */
193
194
195			#endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
196
197
198			#ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
199			#define INCLUDED_volk_32f_x2_multiply_32f_a_H
200
201			#include <inttypes.h>
202			#include <stdio.h>
203
204			#ifdef LV_HAVE_SSE
205			#include <xmmintrin.h>
206
207		2	static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector,
208			const float* aVector,
209			const float* bVector,
210			unsigned int num_points)
211			{
212		2	unsigned int number = 0;
213		2	const unsigned int quarterPoints = num_points / 4;
214
215		2	float* cPtr = cVector;
216		2	const float* aPtr = aVector;
217		2	const float* bPtr = bVector;
218
219			__m128 aVal, bVal, cVal;
220	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
221
222		65534	aVal = _mm_load_ps(aPtr);
223		65534	bVal = _mm_load_ps(bPtr);
224
225		65534	cVal = _mm_mul_ps(aVal, bVal);
226
227			_mm_store_ps(cPtr, cVal); // Store the results back into the C container
228
229		65534	aPtr += 4;
230		65534	bPtr += 4;
231		65534	cPtr += 4;
232			}
233
234		2	number = quarterPoints * 4;
235	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
236		6	cPtr++ = (aPtr++) * (*bPtr++);
237			}
238		2	}
239			#endif /* LV_HAVE_SSE */
240
241			#ifdef LV_HAVE_AVX512F
242			#include <immintrin.h>
243
244		✗	static inline void volk_32f_x2_multiply_32f_a_avx512f(float* cVector,
245			const float* aVector,
246			const float* bVector,
247			unsigned int num_points)
248			{
249		✗	unsigned int number = 0;
250		✗	const unsigned int sixteenthPoints = num_points / 16;
251
252		✗	float* cPtr = cVector;
253		✗	const float* aPtr = aVector;
254		✗	const float* bPtr = bVector;
255
256			__m512 aVal, bVal, cVal;
257		✗	for (; number < sixteenthPoints; number++) {
258
259		✗	aVal = _mm512_load_ps(aPtr);
260		✗	bVal = _mm512_load_ps(bPtr);
261
262		✗	cVal = _mm512_mul_ps(aVal, bVal);
263
264			_mm512_store_ps(cPtr, cVal); // Store the results back into the C container
265
266		✗	aPtr += 16;
267		✗	bPtr += 16;
268		✗	cPtr += 16;
269			}
270
271		✗	number = sixteenthPoints * 16;
272		✗	for (; number < num_points; number++) {
273		✗	cPtr++ = (aPtr++) * (*bPtr++);
274			}
275		✗	}
276			#endif /* LV_HAVE_AVX512F */
277
278
279			#ifdef LV_HAVE_AVX
280			#include <immintrin.h>
281
282		2	static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector,
283			const float* aVector,
284			const float* bVector,
285			unsigned int num_points)
286			{
287		2	unsigned int number = 0;
288		2	const unsigned int eighthPoints = num_points / 8;
289
290		2	float* cPtr = cVector;
291		2	const float* aPtr = aVector;
292		2	const float* bPtr = bVector;
293
294			__m256 aVal, bVal, cVal;
295	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
296
297		32766	aVal = _mm256_load_ps(aPtr);
298		32766	bVal = _mm256_load_ps(bPtr);
299
300		32766	cVal = _mm256_mul_ps(aVal, bVal);
301
302			_mm256_store_ps(cPtr, cVal); // Store the results back into the C container
303
304		32766	aPtr += 8;
305		32766	bPtr += 8;
306		32766	cPtr += 8;
307			}
308
309		2	number = eighthPoints * 8;
310	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
311		14	cPtr++ = (aPtr++) * (*bPtr++);
312			}
313		2	}
314			#endif /* LV_HAVE_AVX */
315
316
317			#ifdef LV_HAVE_NEON
318			#include <arm_neon.h>
319
320			static inline void volk_32f_x2_multiply_32f_neon(float* cVector,
321			const float* aVector,
322			const float* bVector,
323			unsigned int num_points)
324			{
325			const unsigned int quarter_points = num_points / 4;
326			unsigned int number;
327			float32x4_t avec, bvec, cvec;
328			for (number = 0; number < quarter_points; ++number) {
329			avec = vld1q_f32(aVector);
330			bvec = vld1q_f32(bVector);
331			cvec = vmulq_f32(avec, bvec);
332			vst1q_f32(cVector, cvec);
333			aVector += 4;
334			bVector += 4;
335			cVector += 4;
336			}
337			for (number = quarter_points * 4; number < num_points; ++number) {
338			cVector++ = aVector++ * *bVector++;
339			}
340			}
341			#endif /* LV_HAVE_NEON */
342
343
344			#ifdef LV_HAVE_GENERIC
345
346		2	static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector,
347			const float* aVector,
348			const float* bVector,
349			unsigned int num_points)
350			{
351		2	float* cPtr = cVector;
352		2	const float* aPtr = aVector;
353		2	const float* bPtr = bVector;
354		2	unsigned int number = 0;
355
356	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
357		262142	cPtr++ = (aPtr++) * (*bPtr++);
358			}
359		2	}
360			#endif /* LV_HAVE_GENERIC */
361
362
363			#ifdef LV_HAVE_ORC
364			extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector,
365			const float* aVector,
366			const float* bVector,
367			unsigned int num_points);
368
369		2	static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector,
370			const float* aVector,
371			const float* bVector,
372			unsigned int num_points)
373			{
374		2	volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
375		2	}
376			#endif /* LV_HAVE_ORC */
377
378
379			#endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */
380