GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_s32f_multiply_32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	81	81	100.0%
Functions:	7	7	100.0%
Branches:	20	20	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_s32f_multiply_32f
    
       *
    
       * \b Overview
    
       *
    
       * Multiplies a floating point vector by a floating point scalar.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_s32f_multiply_32f(float* cVector, const float* aVector, const float
    
       * scalar, unsigned int num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li aVector: The input vector of floats.
    
       * \li scalar: the scalar value to multiply against \p aVector.
    
       * \li num_points: The number of data points.
    
       *
    
       * \b Outputs
    
       * \li cVector: The output vector of floats.
    
       *
    
       * \b Example
    
       * \code
    
       *  int N = 10;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
    
       *   }
    
       *
    
       *   // Normalize by the smallest delta (0.2 in this example)
    
       *   float scale = 5.0f;
    
       *
    
       *   volk_32f_s32f_multiply_32f(out, increasing, scale, N);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       printf("out[%u] = %f\n", ii, out[ii]);
    
       *   }
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
    
      #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
          __m128 aVal, bVal, cVal;
    
      2
          bVal = _mm_set_ps1(scalar);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_loadu_ps(aPtr);
    
      65534
              cVal = _mm_mul_ps(aVal, bVal);
    
              _mm_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) * scalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
          __m256 aVal, bVal, cVal;
    
      2
          bVal = _mm256_set1_ps(scalar);
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_loadu_ps(aPtr);
    
      32766
              cVal = _mm256_mul_ps(aVal, bVal);
    
              _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
    
      32766
              aPtr += 8;
    
      32766
              cPtr += 8;
    
          }
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              *cPtr++ = (*aPtr++) * scalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_RISCV64
    
      extern void volk_32f_s32f_multiply_32f_sifive_u74(float* cVector,
    
                                                        const float* aVector,
    
                                                        const float scalar,
    
                                                        unsigned int num_points);
    
      #endif /* LV_HAVE_RISCV64 */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_s32f_multiply_32f_generic(float* cVector,
    
                                                            const float* aVector,
    
                                                            const float scalar,
    
                                                            unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* inputPtr = aVector;
    
      2
          float* outputPtr = cVector;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *outputPtr = (*inputPtr) * scalar;
    
      262142
              inputPtr++;
    
      262142
              outputPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
    
      #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
    
      #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
    
      #include <inttypes.h>
    
      #include <stdio.h>
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          float* cPtr = cVector;
    
      2
          const float* aPtr = aVector;
    
          __m128 aVal, bVal, cVal;
    
      2
          bVal = _mm_set_ps1(scalar);
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_load_ps(aPtr);
    
      65534
              cVal = _mm_mul_ps(aVal, bVal);
    
              _mm_store_ps(cPtr, cVal); // Store the results back into the C container
    
      65534
              aPtr += 4;
    
      65534
              cPtr += 4;
    
          }
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              *cPtr++ = (*aPtr++) * scalar;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      6
      static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      6
          unsigned int number = 0;
    
      6
          const unsigned int eighthPoints = num_points / 8;
    
      6
          float* cPtr = cVector;
    
      6
          const float* aPtr = aVector;
    
          __m256 aVal, bVal, cVal;
    
      6
          bVal = _mm256_set1_ps(scalar);
    
        2/2✓ Branch 0 taken 98298 times.
✓ Branch 1 taken 6 times.

      98304
          for (; number < eighthPoints; number++) {
    
      98298
              aVal = _mm256_load_ps(aPtr);
    
      98298
              cVal = _mm256_mul_ps(aVal, bVal);
    
              _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
    
      98298
              aPtr += 8;
    
      98298
              cPtr += 8;
    
          }
    
      6
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 42 times.
✓ Branch 1 taken 6 times.

      48
          for (; number < num_points; number++) {
    
      42
              *cPtr++ = (*aPtr++) * scalar;
    
          }
    
      6
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_NEON
    
      #include <arm_neon.h>
    
      static inline void volk_32f_s32f_multiply_32f_u_neon(float* cVector,
    
                                                           const float* aVector,
    
                                                           const float scalar,
    
                                                           unsigned int num_points)
    
      {
    
          unsigned int number = 0;
    
          const float* inputPtr = aVector;
    
          float* outputPtr = cVector;
    
          const unsigned int quarterPoints = num_points / 4;
    
          float32x4_t aVal, cVal;
    
          for (number = 0; number < quarterPoints; number++) {
    
              aVal = vld1q_f32(inputPtr);       // Load into NEON regs
    
              cVal = vmulq_n_f32(aVal, scalar); // Do the multiply
    
              vst1q_f32(outputPtr, cVal);       // Store results back to output
    
              inputPtr += 4;
    
              outputPtr += 4;
    
          }
    
          for (number = quarterPoints * 4; number < num_points; number++) {
    
              *outputPtr++ = (*inputPtr++) * scalar;
    
          }
    
      }
    
      #endif /* LV_HAVE_NEON */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector,
    
                                                              const float* aVector,
    
                                                              const float scalar,
    
                                                              unsigned int num_points)
    
      {
    
      2
          unsigned int number = 0;
    
      2
          const float* inputPtr = aVector;
    
      2
          float* outputPtr = cVector;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (number = 0; number < num_points; number++) {
    
      262142
              *outputPtr = (*inputPtr) * scalar;
    
      262142
              inputPtr++;
    
      262142
              outputPtr++;
    
          }
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #ifdef LV_HAVE_ORC
    
      extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst,
    
                                                        const float* src,
    
                                                        const float scalar,
    
                                                        unsigned int num_points);
    
      2
      static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector,
    
                                                          const float* aVector,
    
                                                          const float scalar,
    
                                                          unsigned int num_points)
    
      {
    
      2
          volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_s32f_multiply_32f
12			*
13			* \b Overview
14			*
15			* Multiplies a floating point vector by a floating point scalar.
16			*
17			* <b>Dispatcher Prototype</b>
18			* \code
19			* void volk_32f_s32f_multiply_32f(float* cVector, const float* aVector, const float
20			* scalar, unsigned int num_points) \endcode
21			*
22			* \b Inputs
23			* \li aVector: The input vector of floats.
24			* \li scalar: the scalar value to multiply against \p aVector.
25			* \li num_points: The number of data points.
26			*
27			* \b Outputs
28			* \li cVector: The output vector of floats.
29			*
30			* \b Example
31			* \code
32			* int N = 10;
33			* unsigned int alignment = volk_get_alignment();
34			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
35			* float* out = (float)volk_malloc(sizeof(float)N, alignment);
36			*
37			*
38			* for(unsigned int ii = 0; ii < N; ++ii){
39			* increasing[ii] = 2.f * ((float)ii / (float)N) - 1.f;
40			* }
41			*
42			* // Normalize by the smallest delta (0.2 in this example)
43			* float scale = 5.0f;
44			*
45			* volk_32f_s32f_multiply_32f(out, increasing, scale, N);
46			*
47			* for(unsigned int ii = 0; ii < N; ++ii){
48			* printf("out[%u] = %f\n", ii, out[ii]);
49			* }
50			*
51			* volk_free(increasing);
52			* volk_free(out);
53			* \endcode
54			*/
55
56			#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
57			#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
58
59			#include <inttypes.h>
60			#include <stdio.h>
61
62			#ifdef LV_HAVE_SSE
63			#include <xmmintrin.h>
64
65		2	static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector,
66			const float* aVector,
67			const float scalar,
68			unsigned int num_points)
69			{
70		2	unsigned int number = 0;
71		2	const unsigned int quarterPoints = num_points / 4;
72
73		2	float* cPtr = cVector;
74		2	const float* aPtr = aVector;
75
76			__m128 aVal, bVal, cVal;
77		2	bVal = _mm_set_ps1(scalar);
78	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
79		65534	aVal = _mm_loadu_ps(aPtr);
80
81		65534	cVal = _mm_mul_ps(aVal, bVal);
82
83			_mm_storeu_ps(cPtr, cVal); // Store the results back into the C container
84
85		65534	aPtr += 4;
86		65534	cPtr += 4;
87			}
88
89		2	number = quarterPoints * 4;
90	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
91		6	cPtr++ = (aPtr++) * scalar;
92			}
93		2	}
94			#endif /* LV_HAVE_SSE */
95
96			#ifdef LV_HAVE_AVX
97			#include <immintrin.h>
98
99		2	static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector,
100			const float* aVector,
101			const float scalar,
102			unsigned int num_points)
103			{
104		2	unsigned int number = 0;
105		2	const unsigned int eighthPoints = num_points / 8;
106
107		2	float* cPtr = cVector;
108		2	const float* aPtr = aVector;
109
110			__m256 aVal, bVal, cVal;
111		2	bVal = _mm256_set1_ps(scalar);
112	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
113
114		32766	aVal = _mm256_loadu_ps(aPtr);
115
116		32766	cVal = _mm256_mul_ps(aVal, bVal);
117
118			_mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
119
120		32766	aPtr += 8;
121		32766	cPtr += 8;
122			}
123
124		2	number = eighthPoints * 8;
125	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
126		14	cPtr++ = (aPtr++) * scalar;
127			}
128		2	}
129			#endif /* LV_HAVE_AVX */
130
131			#ifdef LV_HAVE_RISCV64
132			extern void volk_32f_s32f_multiply_32f_sifive_u74(float* cVector,
133			const float* aVector,
134			const float scalar,
135			unsigned int num_points);
136			#endif /* LV_HAVE_RISCV64 */
137
138			#ifdef LV_HAVE_GENERIC
139		2	static inline void volk_32f_s32f_multiply_32f_generic(float* cVector,
140			const float* aVector,
141			const float scalar,
142			unsigned int num_points)
143			{
144		2	unsigned int number = 0;
145		2	const float* inputPtr = aVector;
146		2	float* outputPtr = cVector;
147	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
148		262142	outputPtr = (inputPtr) * scalar;
149		262142	inputPtr++;
150		262142	outputPtr++;
151			}
152		2	}
153			#endif /* LV_HAVE_GENERIC */
154
155			#endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
156
157
158			#ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
159			#define INCLUDED_volk_32f_s32f_multiply_32f_a_H
160
161			#include <inttypes.h>
162			#include <stdio.h>
163
164			#ifdef LV_HAVE_SSE
165			#include <xmmintrin.h>
166
167		2	static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector,
168			const float* aVector,
169			const float scalar,
170			unsigned int num_points)
171			{
172		2	unsigned int number = 0;
173		2	const unsigned int quarterPoints = num_points / 4;
174
175		2	float* cPtr = cVector;
176		2	const float* aPtr = aVector;
177
178			__m128 aVal, bVal, cVal;
179		2	bVal = _mm_set_ps1(scalar);
180	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
181		65534	aVal = _mm_load_ps(aPtr);
182
183		65534	cVal = _mm_mul_ps(aVal, bVal);
184
185			_mm_store_ps(cPtr, cVal); // Store the results back into the C container
186
187		65534	aPtr += 4;
188		65534	cPtr += 4;
189			}
190
191		2	number = quarterPoints * 4;
192	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
193		6	cPtr++ = (aPtr++) * scalar;
194			}
195		2	}
196			#endif /* LV_HAVE_SSE */
197
198			#ifdef LV_HAVE_AVX
199			#include <immintrin.h>
200
201		6	static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector,
202			const float* aVector,
203			const float scalar,
204			unsigned int num_points)
205			{
206		6	unsigned int number = 0;
207		6	const unsigned int eighthPoints = num_points / 8;
208
209		6	float* cPtr = cVector;
210		6	const float* aPtr = aVector;
211
212			__m256 aVal, bVal, cVal;
213		6	bVal = _mm256_set1_ps(scalar);
214	2/2 ✓ Branch 0 taken 98298 times. ✓ Branch 1 taken 6 times.	98304	for (; number < eighthPoints; number++) {
215		98298	aVal = _mm256_load_ps(aPtr);
216
217		98298	cVal = _mm256_mul_ps(aVal, bVal);
218
219			_mm256_store_ps(cPtr, cVal); // Store the results back into the C container
220
221		98298	aPtr += 8;
222		98298	cPtr += 8;
223			}
224
225		6	number = eighthPoints * 8;
226	2/2 ✓ Branch 0 taken 42 times. ✓ Branch 1 taken 6 times.	48	for (; number < num_points; number++) {
227		42	cPtr++ = (aPtr++) * scalar;
228			}
229		6	}
230			#endif /* LV_HAVE_AVX */
231
232			#ifdef LV_HAVE_NEON
233			#include <arm_neon.h>
234
235			static inline void volk_32f_s32f_multiply_32f_u_neon(float* cVector,
236			const float* aVector,
237			const float scalar,
238			unsigned int num_points)
239			{
240			unsigned int number = 0;
241			const float* inputPtr = aVector;
242			float* outputPtr = cVector;
243			const unsigned int quarterPoints = num_points / 4;
244
245			float32x4_t aVal, cVal;
246
247			for (number = 0; number < quarterPoints; number++) {
248			aVal = vld1q_f32(inputPtr); // Load into NEON regs
249			cVal = vmulq_n_f32(aVal, scalar); // Do the multiply
250			vst1q_f32(outputPtr, cVal); // Store results back to output
251			inputPtr += 4;
252			outputPtr += 4;
253			}
254			for (number = quarterPoints * 4; number < num_points; number++) {
255			outputPtr++ = (inputPtr++) * scalar;
256			}
257			}
258			#endif /* LV_HAVE_NEON */
259
260
261			#ifdef LV_HAVE_GENERIC
262
263		2	static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector,
264			const float* aVector,
265			const float scalar,
266			unsigned int num_points)
267			{
268		2	unsigned int number = 0;
269		2	const float* inputPtr = aVector;
270		2	float* outputPtr = cVector;
271	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (number = 0; number < num_points; number++) {
272		262142	outputPtr = (inputPtr) * scalar;
273		262142	inputPtr++;
274		262142	outputPtr++;
275			}
276		2	}
277			#endif /* LV_HAVE_GENERIC */
278
279
280			#ifdef LV_HAVE_ORC
281
282			extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst,
283			const float* src,
284			const float scalar,
285			unsigned int num_points);
286
287		2	static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector,
288			const float* aVector,
289			const float scalar,
290			unsigned int num_points)
291			{
292		2	volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
293		2	}
294
295			#endif /* LV_HAVE_GENERIC */
296
297			#endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */
298