GCC Code Coverage Report

Directory:	./
File:	kernels/volk/volk_32f_accumulator_s32f.h
Date:	2023-10-23 23:10:04

	Exec	Total	Coverage
Lines:	96	96	100.0%
Functions:	5	5	100.0%
Branches:	18	18	100.0%

  
      Line
      Branch
      Exec
      Source
    
      /* -*- c++ -*- */
    
      /*
    
       * Copyright 2012, 2014 Free Software Foundation, Inc.
    
       *
    
       * This file is part of VOLK
    
       *
    
       * SPDX-License-Identifier: LGPL-3.0-or-later
    
       */
    
      /*!
    
       * \page volk_32f_accumulator_s32f
    
       *
    
       * \b Overview
    
       *
    
       * Accumulates the values in the input buffer.
    
       *
    
       * <b>Dispatcher Prototype</b>
    
       * \code
    
       * void volk_32f_accumulator_s32f(float* result, const float* inputBuffer, unsigned int
    
       * num_points) \endcode
    
       *
    
       * \b Inputs
    
       * \li inputBuffer The buffer of data to be accumulated
    
       * \li num_points: The number of data points.
    
       *
    
       * \b Outputs
    
       * \li result The accumulated result.
    
       *
    
       * \b Example
    
       * Calculate the sum of numbers  0 through 99
    
       * \code
    
       *   int N = 100;
    
       *   unsigned int alignment = volk_get_alignment();
    
       *   float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
    
       *   float* out = (float*)volk_malloc(sizeof(float), alignment);
    
       *
    
       *   for(unsigned int ii = 0; ii < N; ++ii){
    
       *       increasing[ii] = (float)ii;
    
       *   }
    
       *
    
       *   volk_32f_accumulator_s32f(out, increasing, N);
    
       *
    
       *   printf("sum(1..100) = %1.2f\n", out[0]);
    
       *
    
       *   volk_free(increasing);
    
       *   volk_free(out);
    
       * \endcode
    
       */
    
      #ifndef INCLUDED_volk_32f_accumulator_s32f_a_H
    
      #define INCLUDED_volk_32f_accumulator_s32f_a_H
    
      #include <inttypes.h>
    
      #include <volk/volk_common.h>
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_accumulator_s32f_a_avx(float* result,
    
                                                         const float* inputBuffer,
    
                                                         unsigned int num_points)
    
      {
    
      2
          float returnValue = 0;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* aPtr = inputBuffer;
    
          __VOLK_ATTR_ALIGNED(32) float tempBuffer[8];
    
      2
          __m256 accumulator = _mm256_setzero_ps();
    
      2
          __m256 aVal = _mm256_setzero_ps();
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_load_ps(aPtr);
    
      32766
              accumulator = _mm256_add_ps(accumulator, aVal);
    
      32766
              aPtr += 8;
    
          }
    
          _mm256_store_ps(tempBuffer, accumulator);
    
      2
          returnValue = tempBuffer[0];
    
      2
          returnValue += tempBuffer[1];
    
      2
          returnValue += tempBuffer[2];
    
      2
          returnValue += tempBuffer[3];
    
      2
          returnValue += tempBuffer[4];
    
      2
          returnValue += tempBuffer[5];
    
      2
          returnValue += tempBuffer[6];
    
      2
          returnValue += tempBuffer[7];
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              returnValue += (*aPtr++);
    
          }
    
      2
          *result = returnValue;
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_AVX
    
      #include <immintrin.h>
    
      2
      static inline void volk_32f_accumulator_s32f_u_avx(float* result,
    
                                                         const float* inputBuffer,
    
                                                         unsigned int num_points)
    
      {
    
      2
          float returnValue = 0;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int eighthPoints = num_points / 8;
    
      2
          const float* aPtr = inputBuffer;
    
          __VOLK_ATTR_ALIGNED(32) float tempBuffer[8];
    
      2
          __m256 accumulator = _mm256_setzero_ps();
    
      2
          __m256 aVal = _mm256_setzero_ps();
    
        2/2✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.

      32768
          for (; number < eighthPoints; number++) {
    
      32766
              aVal = _mm256_loadu_ps(aPtr);
    
      32766
              accumulator = _mm256_add_ps(accumulator, aVal);
    
      32766
              aPtr += 8;
    
          }
    
          _mm256_store_ps(tempBuffer, accumulator);
    
      2
          returnValue = tempBuffer[0];
    
      2
          returnValue += tempBuffer[1];
    
      2
          returnValue += tempBuffer[2];
    
      2
          returnValue += tempBuffer[3];
    
      2
          returnValue += tempBuffer[4];
    
      2
          returnValue += tempBuffer[5];
    
      2
          returnValue += tempBuffer[6];
    
      2
          returnValue += tempBuffer[7];
    
      2
          number = eighthPoints * 8;
    
        2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.

      16
          for (; number < num_points; number++) {
    
      14
              returnValue += (*aPtr++);
    
          }
    
      2
          *result = returnValue;
    
      2
      }
    
      #endif /* LV_HAVE_AVX */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_accumulator_s32f_a_sse(float* result,
    
                                                         const float* inputBuffer,
    
                                                         unsigned int num_points)
    
      {
    
      2
          float returnValue = 0;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* aPtr = inputBuffer;
    
          __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
    
      2
          __m128 accumulator = _mm_setzero_ps();
    
      2
          __m128 aVal = _mm_setzero_ps();
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_load_ps(aPtr);
    
      65534
              accumulator = _mm_add_ps(accumulator, aVal);
    
      65534
              aPtr += 4;
    
          }
    
          _mm_store_ps(tempBuffer, accumulator);
    
      2
          returnValue = tempBuffer[0];
    
      2
          returnValue += tempBuffer[1];
    
      2
          returnValue += tempBuffer[2];
    
      2
          returnValue += tempBuffer[3];
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              returnValue += (*aPtr++);
    
          }
    
      2
          *result = returnValue;
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_SSE
    
      #include <xmmintrin.h>
    
      2
      static inline void volk_32f_accumulator_s32f_u_sse(float* result,
    
                                                         const float* inputBuffer,
    
                                                         unsigned int num_points)
    
      {
    
      2
          float returnValue = 0;
    
      2
          unsigned int number = 0;
    
      2
          const unsigned int quarterPoints = num_points / 4;
    
      2
          const float* aPtr = inputBuffer;
    
          __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
    
      2
          __m128 accumulator = _mm_setzero_ps();
    
      2
          __m128 aVal = _mm_setzero_ps();
    
        2/2✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.

      65536
          for (; number < quarterPoints; number++) {
    
      65534
              aVal = _mm_loadu_ps(aPtr);
    
      65534
              accumulator = _mm_add_ps(accumulator, aVal);
    
      65534
              aPtr += 4;
    
          }
    
          _mm_store_ps(tempBuffer, accumulator);
    
      2
          returnValue = tempBuffer[0];
    
      2
          returnValue += tempBuffer[1];
    
      2
          returnValue += tempBuffer[2];
    
      2
          returnValue += tempBuffer[3];
    
      2
          number = quarterPoints * 4;
    
        2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.

      8
          for (; number < num_points; number++) {
    
      6
              returnValue += (*aPtr++);
    
          }
    
      2
          *result = returnValue;
    
      2
      }
    
      #endif /* LV_HAVE_SSE */
    
      #ifdef LV_HAVE_GENERIC
    
      2
      static inline void volk_32f_accumulator_s32f_generic(float* result,
    
                                                           const float* inputBuffer,
    
                                                           unsigned int num_points)
    
      {
    
      2
          const float* aPtr = inputBuffer;
    
      2
          unsigned int number = 0;
    
      2
          float returnValue = 0;
    
        2/2✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.

      262144
          for (; number < num_points; number++) {
    
      262142
              returnValue += (*aPtr++);
    
          }
    
      2
          *result = returnValue;
    
      2
      }
    
      #endif /* LV_HAVE_GENERIC */
    
      #endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */

Line	Branch	Exec	Source
1			/* -- c++ -- */
2			/*
3			* Copyright 2012, 2014 Free Software Foundation, Inc.
4			*
5			* This file is part of VOLK
6			*
7			* SPDX-License-Identifier: LGPL-3.0-or-later
8			*/
9
10			/*!
11			* \page volk_32f_accumulator_s32f
12			*
13			* \b Overview
14			*
15			* Accumulates the values in the input buffer.
16			*
17			* <b>Dispatcher Prototype</b>
18			* \code
19			* void volk_32f_accumulator_s32f(float* result, const float* inputBuffer, unsigned int
20			* num_points) \endcode
21			*
22			* \b Inputs
23			* \li inputBuffer The buffer of data to be accumulated
24			* \li num_points: The number of data points.
25			*
26			* \b Outputs
27			* \li result The accumulated result.
28			*
29			* \b Example
30			* Calculate the sum of numbers 0 through 99
31			* \code
32			* int N = 100;
33			* unsigned int alignment = volk_get_alignment();
34			* float* increasing = (float)volk_malloc(sizeof(float)N, alignment);
35			* float* out = (float*)volk_malloc(sizeof(float), alignment);
36			*
37			* for(unsigned int ii = 0; ii < N; ++ii){
38			* increasing[ii] = (float)ii;
39			* }
40			*
41			* volk_32f_accumulator_s32f(out, increasing, N);
42			*
43			* printf("sum(1..100) = %1.2f\n", out[0]);
44			*
45			* volk_free(increasing);
46			* volk_free(out);
47			* \endcode
48			*/
49
50			#ifndef INCLUDED_volk_32f_accumulator_s32f_a_H
51			#define INCLUDED_volk_32f_accumulator_s32f_a_H
52
53			#include <inttypes.h>
54			#include <volk/volk_common.h>
55
56			#ifdef LV_HAVE_AVX
57			#include <immintrin.h>
58
59		2	static inline void volk_32f_accumulator_s32f_a_avx(float* result,
60			const float* inputBuffer,
61			unsigned int num_points)
62			{
63		2	float returnValue = 0;
64		2	unsigned int number = 0;
65		2	const unsigned int eighthPoints = num_points / 8;
66
67		2	const float* aPtr = inputBuffer;
68			__VOLK_ATTR_ALIGNED(32) float tempBuffer[8];
69
70		2	__m256 accumulator = _mm256_setzero_ps();
71		2	__m256 aVal = _mm256_setzero_ps();
72
73	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
74		32766	aVal = _mm256_load_ps(aPtr);
75		32766	accumulator = _mm256_add_ps(accumulator, aVal);
76		32766	aPtr += 8;
77			}
78
79			_mm256_store_ps(tempBuffer, accumulator);
80
81		2	returnValue = tempBuffer[0];
82		2	returnValue += tempBuffer[1];
83		2	returnValue += tempBuffer[2];
84		2	returnValue += tempBuffer[3];
85		2	returnValue += tempBuffer[4];
86		2	returnValue += tempBuffer[5];
87		2	returnValue += tempBuffer[6];
88		2	returnValue += tempBuffer[7];
89
90		2	number = eighthPoints * 8;
91	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
92		14	returnValue += (*aPtr++);
93			}
94		2	*result = returnValue;
95		2	}
96			#endif /* LV_HAVE_AVX */
97
98
99			#ifdef LV_HAVE_AVX
100			#include <immintrin.h>
101
102		2	static inline void volk_32f_accumulator_s32f_u_avx(float* result,
103			const float* inputBuffer,
104			unsigned int num_points)
105			{
106		2	float returnValue = 0;
107		2	unsigned int number = 0;
108		2	const unsigned int eighthPoints = num_points / 8;
109
110		2	const float* aPtr = inputBuffer;
111			__VOLK_ATTR_ALIGNED(32) float tempBuffer[8];
112
113		2	__m256 accumulator = _mm256_setzero_ps();
114		2	__m256 aVal = _mm256_setzero_ps();
115
116	2/2 ✓ Branch 0 taken 32766 times. ✓ Branch 1 taken 2 times.	32768	for (; number < eighthPoints; number++) {
117		32766	aVal = _mm256_loadu_ps(aPtr);
118		32766	accumulator = _mm256_add_ps(accumulator, aVal);
119		32766	aPtr += 8;
120			}
121
122			_mm256_store_ps(tempBuffer, accumulator);
123
124		2	returnValue = tempBuffer[0];
125		2	returnValue += tempBuffer[1];
126		2	returnValue += tempBuffer[2];
127		2	returnValue += tempBuffer[3];
128		2	returnValue += tempBuffer[4];
129		2	returnValue += tempBuffer[5];
130		2	returnValue += tempBuffer[6];
131		2	returnValue += tempBuffer[7];
132
133		2	number = eighthPoints * 8;
134	2/2 ✓ Branch 0 taken 14 times. ✓ Branch 1 taken 2 times.	16	for (; number < num_points; number++) {
135		14	returnValue += (*aPtr++);
136			}
137		2	*result = returnValue;
138		2	}
139			#endif /* LV_HAVE_AVX */
140
141
142			#ifdef LV_HAVE_SSE
143			#include <xmmintrin.h>
144
145		2	static inline void volk_32f_accumulator_s32f_a_sse(float* result,
146			const float* inputBuffer,
147			unsigned int num_points)
148			{
149		2	float returnValue = 0;
150		2	unsigned int number = 0;
151		2	const unsigned int quarterPoints = num_points / 4;
152
153		2	const float* aPtr = inputBuffer;
154			__VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
155
156		2	__m128 accumulator = _mm_setzero_ps();
157		2	__m128 aVal = _mm_setzero_ps();
158
159	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
160		65534	aVal = _mm_load_ps(aPtr);
161		65534	accumulator = _mm_add_ps(accumulator, aVal);
162		65534	aPtr += 4;
163			}
164
165			_mm_store_ps(tempBuffer, accumulator);
166
167		2	returnValue = tempBuffer[0];
168		2	returnValue += tempBuffer[1];
169		2	returnValue += tempBuffer[2];
170		2	returnValue += tempBuffer[3];
171
172		2	number = quarterPoints * 4;
173	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
174		6	returnValue += (*aPtr++);
175			}
176		2	*result = returnValue;
177		2	}
178			#endif /* LV_HAVE_SSE */
179
180
181			#ifdef LV_HAVE_SSE
182			#include <xmmintrin.h>
183
184		2	static inline void volk_32f_accumulator_s32f_u_sse(float* result,
185			const float* inputBuffer,
186			unsigned int num_points)
187			{
188		2	float returnValue = 0;
189		2	unsigned int number = 0;
190		2	const unsigned int quarterPoints = num_points / 4;
191
192		2	const float* aPtr = inputBuffer;
193			__VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
194
195		2	__m128 accumulator = _mm_setzero_ps();
196		2	__m128 aVal = _mm_setzero_ps();
197
198	2/2 ✓ Branch 0 taken 65534 times. ✓ Branch 1 taken 2 times.	65536	for (; number < quarterPoints; number++) {
199		65534	aVal = _mm_loadu_ps(aPtr);
200		65534	accumulator = _mm_add_ps(accumulator, aVal);
201		65534	aPtr += 4;
202			}
203
204			_mm_store_ps(tempBuffer, accumulator);
205
206		2	returnValue = tempBuffer[0];
207		2	returnValue += tempBuffer[1];
208		2	returnValue += tempBuffer[2];
209		2	returnValue += tempBuffer[3];
210
211		2	number = quarterPoints * 4;
212	2/2 ✓ Branch 0 taken 6 times. ✓ Branch 1 taken 2 times.	8	for (; number < num_points; number++) {
213		6	returnValue += (*aPtr++);
214			}
215		2	*result = returnValue;
216		2	}
217			#endif /* LV_HAVE_SSE */
218
219			#ifdef LV_HAVE_GENERIC
220		2	static inline void volk_32f_accumulator_s32f_generic(float* result,
221			const float* inputBuffer,
222			unsigned int num_points)
223			{
224		2	const float* aPtr = inputBuffer;
225		2	unsigned int number = 0;
226		2	float returnValue = 0;
227
228	2/2 ✓ Branch 0 taken 262142 times. ✓ Branch 1 taken 2 times.	262144	for (; number < num_points; number++) {
229		262142	returnValue += (*aPtr++);
230			}
231		2	*result = returnValue;
232		2	}
233			#endif /* LV_HAVE_GENERIC */
234
235			#endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */
236