GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_accumulator_s32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 96 96 100.0%
Functions: 5 5 100.0%
Branches: 18 18 100.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_accumulator_s32f
12 *
13 * \b Overview
14 *
15 * Accumulates the values in the input buffer.
16 *
17 * <b>Dispatcher Prototype</b>
18 * \code
19 * void volk_32f_accumulator_s32f(float* result, const float* inputBuffer, unsigned int
20 * num_points) \endcode
21 *
22 * \b Inputs
23 * \li inputBuffer The buffer of data to be accumulated
24 * \li num_points: The number of data points.
25 *
26 * \b Outputs
27 * \li result The accumulated result.
28 *
29 * \b Example
30 * Calculate the sum of numbers 0 through 99
31 * \code
32 * int N = 100;
33 * unsigned int alignment = volk_get_alignment();
34 * float* increasing = (float*)volk_malloc(sizeof(float)*N, alignment);
35 * float* out = (float*)volk_malloc(sizeof(float), alignment);
36 *
37 * for(unsigned int ii = 0; ii < N; ++ii){
38 * increasing[ii] = (float)ii;
39 * }
40 *
41 * volk_32f_accumulator_s32f(out, increasing, N);
42 *
43 * printf("sum(1..100) = %1.2f\n", out[0]);
44 *
45 * volk_free(increasing);
46 * volk_free(out);
47 * \endcode
48 */
49
50 #ifndef INCLUDED_volk_32f_accumulator_s32f_a_H
51 #define INCLUDED_volk_32f_accumulator_s32f_a_H
52
53 #include <inttypes.h>
54 #include <volk/volk_common.h>
55
56 #ifdef LV_HAVE_AVX
57 #include <immintrin.h>
58
59 2 static inline void volk_32f_accumulator_s32f_a_avx(float* result,
60 const float* inputBuffer,
61 unsigned int num_points)
62 {
63 2 float returnValue = 0;
64 2 unsigned int number = 0;
65 2 const unsigned int eighthPoints = num_points / 8;
66
67 2 const float* aPtr = inputBuffer;
68 __VOLK_ATTR_ALIGNED(32) float tempBuffer[8];
69
70 2 __m256 accumulator = _mm256_setzero_ps();
71 2 __m256 aVal = _mm256_setzero_ps();
72
73
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
74 32766 aVal = _mm256_load_ps(aPtr);
75 32766 accumulator = _mm256_add_ps(accumulator, aVal);
76 32766 aPtr += 8;
77 }
78
79 _mm256_store_ps(tempBuffer, accumulator);
80
81 2 returnValue = tempBuffer[0];
82 2 returnValue += tempBuffer[1];
83 2 returnValue += tempBuffer[2];
84 2 returnValue += tempBuffer[3];
85 2 returnValue += tempBuffer[4];
86 2 returnValue += tempBuffer[5];
87 2 returnValue += tempBuffer[6];
88 2 returnValue += tempBuffer[7];
89
90 2 number = eighthPoints * 8;
91
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
92 14 returnValue += (*aPtr++);
93 }
94 2 *result = returnValue;
95 2 }
96 #endif /* LV_HAVE_AVX */
97
98
99 #ifdef LV_HAVE_AVX
100 #include <immintrin.h>
101
102 2 static inline void volk_32f_accumulator_s32f_u_avx(float* result,
103 const float* inputBuffer,
104 unsigned int num_points)
105 {
106 2 float returnValue = 0;
107 2 unsigned int number = 0;
108 2 const unsigned int eighthPoints = num_points / 8;
109
110 2 const float* aPtr = inputBuffer;
111 __VOLK_ATTR_ALIGNED(32) float tempBuffer[8];
112
113 2 __m256 accumulator = _mm256_setzero_ps();
114 2 __m256 aVal = _mm256_setzero_ps();
115
116
2/2
✓ Branch 0 taken 32766 times.
✓ Branch 1 taken 2 times.
32768 for (; number < eighthPoints; number++) {
117 32766 aVal = _mm256_loadu_ps(aPtr);
118 32766 accumulator = _mm256_add_ps(accumulator, aVal);
119 32766 aPtr += 8;
120 }
121
122 _mm256_store_ps(tempBuffer, accumulator);
123
124 2 returnValue = tempBuffer[0];
125 2 returnValue += tempBuffer[1];
126 2 returnValue += tempBuffer[2];
127 2 returnValue += tempBuffer[3];
128 2 returnValue += tempBuffer[4];
129 2 returnValue += tempBuffer[5];
130 2 returnValue += tempBuffer[6];
131 2 returnValue += tempBuffer[7];
132
133 2 number = eighthPoints * 8;
134
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (; number < num_points; number++) {
135 14 returnValue += (*aPtr++);
136 }
137 2 *result = returnValue;
138 2 }
139 #endif /* LV_HAVE_AVX */
140
141
142 #ifdef LV_HAVE_SSE
143 #include <xmmintrin.h>
144
145 2 static inline void volk_32f_accumulator_s32f_a_sse(float* result,
146 const float* inputBuffer,
147 unsigned int num_points)
148 {
149 2 float returnValue = 0;
150 2 unsigned int number = 0;
151 2 const unsigned int quarterPoints = num_points / 4;
152
153 2 const float* aPtr = inputBuffer;
154 __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
155
156 2 __m128 accumulator = _mm_setzero_ps();
157 2 __m128 aVal = _mm_setzero_ps();
158
159
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
160 65534 aVal = _mm_load_ps(aPtr);
161 65534 accumulator = _mm_add_ps(accumulator, aVal);
162 65534 aPtr += 4;
163 }
164
165 _mm_store_ps(tempBuffer, accumulator);
166
167 2 returnValue = tempBuffer[0];
168 2 returnValue += tempBuffer[1];
169 2 returnValue += tempBuffer[2];
170 2 returnValue += tempBuffer[3];
171
172 2 number = quarterPoints * 4;
173
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
174 6 returnValue += (*aPtr++);
175 }
176 2 *result = returnValue;
177 2 }
178 #endif /* LV_HAVE_SSE */
179
180
181 #ifdef LV_HAVE_SSE
182 #include <xmmintrin.h>
183
184 2 static inline void volk_32f_accumulator_s32f_u_sse(float* result,
185 const float* inputBuffer,
186 unsigned int num_points)
187 {
188 2 float returnValue = 0;
189 2 unsigned int number = 0;
190 2 const unsigned int quarterPoints = num_points / 4;
191
192 2 const float* aPtr = inputBuffer;
193 __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
194
195 2 __m128 accumulator = _mm_setzero_ps();
196 2 __m128 aVal = _mm_setzero_ps();
197
198
2/2
✓ Branch 0 taken 65534 times.
✓ Branch 1 taken 2 times.
65536 for (; number < quarterPoints; number++) {
199 65534 aVal = _mm_loadu_ps(aPtr);
200 65534 accumulator = _mm_add_ps(accumulator, aVal);
201 65534 aPtr += 4;
202 }
203
204 _mm_store_ps(tempBuffer, accumulator);
205
206 2 returnValue = tempBuffer[0];
207 2 returnValue += tempBuffer[1];
208 2 returnValue += tempBuffer[2];
209 2 returnValue += tempBuffer[3];
210
211 2 number = quarterPoints * 4;
212
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (; number < num_points; number++) {
213 6 returnValue += (*aPtr++);
214 }
215 2 *result = returnValue;
216 2 }
217 #endif /* LV_HAVE_SSE */
218
219 #ifdef LV_HAVE_GENERIC
220 2 static inline void volk_32f_accumulator_s32f_generic(float* result,
221 const float* inputBuffer,
222 unsigned int num_points)
223 {
224 2 const float* aPtr = inputBuffer;
225 2 unsigned int number = 0;
226 2 float returnValue = 0;
227
228
2/2
✓ Branch 0 taken 262142 times.
✓ Branch 1 taken 2 times.
262144 for (; number < num_points; number++) {
229 262142 returnValue += (*aPtr++);
230 }
231 2 *result = returnValue;
232 2 }
233 #endif /* LV_HAVE_GENERIC */
234
235 #endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */
236