GCC Code Coverage Report


Directory: ./
File: kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 158 170 92.9%
Functions: 4 4 100.0%
Branches: 60 72 83.3%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*!
11 * \page volk_32f_s32f_32f_fm_detect_32f
12 *
13 * \b Overview
14 *
15 * Performs FM-detect differentiation on the input vector and stores
16 * the results in the output vector.
17 *
18 * <b>Dispatcher Prototype</b>
19 * \code
20 * void volk_32f_s32f_32f_fm_detect_32f(float* outputVector, const float* inputVector,
21 * const float bound, float* saveValue, unsigned int num_points) \endcode
22 *
23 * \b Inputs
24 * \li inputVector: The input vector containing phase data (must be on the interval
25 * (-bound, bound]). \li bound: The interval that the input phase data is in, which is
26 * used to modulo the differentiation. \li saveValue: A pointer to a float which contains
27 * the phase value of the sample before the first input sample. \li num_points The number
28 * of data points.
29 *
30 * \b Outputs
31 * \li outputVector: The vector where the results will be stored.
32 *
33 * \b Example
34 * \code
35 * int N = 10000;
36 *
37 * <FIXME>
38 *
39 * volk_32f_s32f_32f_fm_detect_32f();
40 *
41 * \endcode
42 */
43
44 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
45 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
46
47 #include <inttypes.h>
48 #include <stdio.h>
49
50 #ifdef LV_HAVE_AVX
51 #include <immintrin.h>
52
53 2 static inline void volk_32f_s32f_32f_fm_detect_32f_a_avx(float* outputVector,
54 const float* inputVector,
55 const float bound,
56 float* saveValue,
57 unsigned int num_points)
58 {
59
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (num_points < 1) {
60 return;
61 }
62 2 unsigned int number = 1;
63 2 unsigned int j = 0;
64 // num_points-1 keeps Fedora 7's gcc from crashing...
65 // num_points won't work. :(
66 2 const unsigned int eighthPoints = (num_points - 1) / 8;
67
68 2 float* outPtr = outputVector;
69 2 const float* inPtr = inputVector;
70 2 __m256 upperBound = _mm256_set1_ps(bound);
71 2 __m256 lowerBound = _mm256_set1_ps(-bound);
72 __m256 next3old1;
73 __m256 next4;
74 __m256 boundAdjust;
75 2 __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above.
76 2 __m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below.
77 // Do the first 8 by hand since we're going in from the saveValue:
78 2 *outPtr = *inPtr - *saveValue;
79
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr > bound)
80 *outPtr -= 2 * bound;
81
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr < -bound)
82 *outPtr += 2 * bound;
83 2 inPtr++;
84 2 outPtr++;
85
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
86 14 *outPtr = *(inPtr) - *(inPtr - 1);
87
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
14 if (*outPtr > bound)
88 1 *outPtr -= 2 * bound;
89
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
14 if (*outPtr < -bound)
90 1 *outPtr += 2 * bound;
91 14 inPtr++;
92 14 outPtr++;
93 }
94
95
2/2
✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.
32766 for (; number < eighthPoints; number++) {
96 // Load data
97 65528 next3old1 = _mm256_loadu_ps((float*)(inPtr - 1));
98 32764 next4 = _mm256_load_ps(inPtr);
99 32764 inPtr += 8;
100 // Subtract and store:
101 32764 next3old1 = _mm256_sub_ps(next4, next3old1);
102 // Bound:
103 32764 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
104 32764 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
105 32764 next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
106 32764 next4 = _mm256_and_ps(next4, negBoundAdjust);
107 32764 boundAdjust = _mm256_or_ps(next4, boundAdjust);
108 // Make sure we're in the bounding interval:
109 32764 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
110 _mm256_store_ps(outPtr, next3old1); // Store the results back into the output
111 32764 outPtr += 8;
112 }
113
114
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
115 14 number++) {
116 14 *outPtr = *(inPtr) - *(inPtr - 1);
117
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
14 if (*outPtr > bound)
118 4 *outPtr -= 2 * bound;
119
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
14 if (*outPtr < -bound)
120 1 *outPtr += 2 * bound;
121 14 inPtr++;
122 14 outPtr++;
123 }
124
125 2 *saveValue = inputVector[num_points - 1];
126 }
127 #endif /* LV_HAVE_AVX */
128
129
130 #ifdef LV_HAVE_SSE
131 #include <xmmintrin.h>
132
133 2 static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector,
134 const float* inputVector,
135 const float bound,
136 float* saveValue,
137 unsigned int num_points)
138 {
139
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (num_points < 1) {
140 return;
141 }
142 2 unsigned int number = 1;
143 2 unsigned int j = 0;
144 // num_points-1 keeps Fedora 7's gcc from crashing...
145 // num_points won't work. :(
146 2 const unsigned int quarterPoints = (num_points - 1) / 4;
147
148 2 float* outPtr = outputVector;
149 2 const float* inPtr = inputVector;
150 2 __m128 upperBound = _mm_set_ps1(bound);
151 2 __m128 lowerBound = _mm_set_ps1(-bound);
152 __m128 next3old1;
153 __m128 next4;
154 __m128 boundAdjust;
155 2 __m128 posBoundAdjust = _mm_set_ps1(-2 * bound); // Subtract when we're above.
156 2 __m128 negBoundAdjust = _mm_set_ps1(2 * bound); // Add when we're below.
157 // Do the first 4 by hand since we're going in from the saveValue:
158 2 *outPtr = *inPtr - *saveValue;
159
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr > bound)
160 *outPtr -= 2 * bound;
161
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr < -bound)
162 *outPtr += 2 * bound;
163 2 inPtr++;
164 2 outPtr++;
165
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) {
166 6 *outPtr = *(inPtr) - *(inPtr - 1);
167
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 if (*outPtr > bound)
168 1 *outPtr -= 2 * bound;
169
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 if (*outPtr < -bound)
170 1 *outPtr += 2 * bound;
171 6 inPtr++;
172 6 outPtr++;
173 }
174
175
2/2
✓ Branch 0 taken 65532 times.
✓ Branch 1 taken 2 times.
65534 for (; number < quarterPoints; number++) {
176 // Load data
177 131064 next3old1 = _mm_loadu_ps((float*)(inPtr - 1));
178 65532 next4 = _mm_load_ps(inPtr);
179 65532 inPtr += 4;
180 // Subtract and store:
181 65532 next3old1 = _mm_sub_ps(next4, next3old1);
182 // Bound:
183 65532 boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
184 65532 boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
185 65532 next4 = _mm_cmplt_ps(next3old1, lowerBound);
186 65532 next4 = _mm_and_ps(next4, negBoundAdjust);
187 65532 boundAdjust = _mm_or_ps(next4, boundAdjust);
188 // Make sure we're in the bounding interval:
189 65532 next3old1 = _mm_add_ps(next3old1, boundAdjust);
190 _mm_store_ps(outPtr, next3old1); // Store the results back into the output
191 65532 outPtr += 4;
192 }
193
194 2 for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints));
195
2/2
✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
8 number < num_points;
196 6 number++) {
197 6 *outPtr = *(inPtr) - *(inPtr - 1);
198
2/2
✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
6 if (*outPtr > bound)
199 2 *outPtr -= 2 * bound;
200
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
6 if (*outPtr < -bound)
201 1 *outPtr += 2 * bound;
202 6 inPtr++;
203 6 outPtr++;
204 }
205
206 2 *saveValue = inputVector[num_points - 1];
207 }
208 #endif /* LV_HAVE_SSE */
209
210 #ifdef LV_HAVE_GENERIC
211
212 2 static inline void volk_32f_s32f_32f_fm_detect_32f_generic(float* outputVector,
213 const float* inputVector,
214 const float bound,
215 float* saveValue,
216 unsigned int num_points)
217 {
218
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (num_points < 1) {
219 return;
220 }
221 2 unsigned int number = 0;
222 2 float* outPtr = outputVector;
223 2 const float* inPtr = inputVector;
224
225 // Do the first 1 by hand since we're going in from the saveValue:
226 2 *outPtr = *inPtr - *saveValue;
227
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr > bound)
228 *outPtr -= 2 * bound;
229
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr < -bound)
230 *outPtr += 2 * bound;
231 2 inPtr++;
232 2 outPtr++;
233
234
2/2
✓ Branch 0 taken 262140 times.
✓ Branch 1 taken 2 times.
262142 for (number = 1; number < num_points; number++) {
235 262140 *outPtr = *(inPtr) - *(inPtr - 1);
236
2/2
✓ Branch 0 taken 32572 times.
✓ Branch 1 taken 229568 times.
262140 if (*outPtr > bound)
237 32572 *outPtr -= 2 * bound;
238
2/2
✓ Branch 0 taken 32879 times.
✓ Branch 1 taken 229261 times.
262140 if (*outPtr < -bound)
239 32879 *outPtr += 2 * bound;
240 262140 inPtr++;
241 262140 outPtr++;
242 }
243
244 2 *saveValue = inputVector[num_points - 1];
245 }
246 #endif /* LV_HAVE_GENERIC */
247
248
249 #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */
250
251
252 #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
253 #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H
254
255 #include <inttypes.h>
256 #include <stdio.h>
257
258 #ifdef LV_HAVE_AVX
259 #include <immintrin.h>
260
261 2 static inline void volk_32f_s32f_32f_fm_detect_32f_u_avx(float* outputVector,
262 const float* inputVector,
263 const float bound,
264 float* saveValue,
265 unsigned int num_points)
266 {
267
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (num_points < 1) {
268 return;
269 }
270 2 unsigned int number = 1;
271 2 unsigned int j = 0;
272 // num_points-1 keeps Fedora 7's gcc from crashing...
273 // num_points won't work. :(
274 2 const unsigned int eighthPoints = (num_points - 1) / 8;
275
276 2 float* outPtr = outputVector;
277 2 const float* inPtr = inputVector;
278 2 __m256 upperBound = _mm256_set1_ps(bound);
279 2 __m256 lowerBound = _mm256_set1_ps(-bound);
280 __m256 next3old1;
281 __m256 next4;
282 __m256 boundAdjust;
283 2 __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above.
284 2 __m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below.
285 // Do the first 8 by hand since we're going in from the saveValue:
286 2 *outPtr = *inPtr - *saveValue;
287
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr > bound)
288 *outPtr -= 2 * bound;
289
1/2
✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
2 if (*outPtr < -bound)
290 *outPtr += 2 * bound;
291 2 inPtr++;
292 2 outPtr++;
293
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) {
294 14 *outPtr = *(inPtr) - *(inPtr - 1);
295
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
14 if (*outPtr > bound)
296 1 *outPtr -= 2 * bound;
297
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
14 if (*outPtr < -bound)
298 1 *outPtr += 2 * bound;
299 14 inPtr++;
300 14 outPtr++;
301 }
302
303
2/2
✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.
32766 for (; number < eighthPoints; number++) {
304 // Load data
305 65528 next3old1 = _mm256_loadu_ps((float*)(inPtr - 1));
306 32764 next4 = _mm256_loadu_ps(inPtr);
307 32764 inPtr += 8;
308 // Subtract and store:
309 32764 next3old1 = _mm256_sub_ps(next4, next3old1);
310 // Bound:
311 32764 boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS);
312 32764 boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust);
313 32764 next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS);
314 32764 next4 = _mm256_and_ps(next4, negBoundAdjust);
315 32764 boundAdjust = _mm256_or_ps(next4, boundAdjust);
316 // Make sure we're in the bounding interval:
317 32764 next3old1 = _mm256_add_ps(next3old1, boundAdjust);
318 _mm256_storeu_ps(outPtr, next3old1); // Store the results back into the output
319 32764 outPtr += 8;
320 }
321
322
2/2
✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
16 for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points;
323 14 number++) {
324 14 *outPtr = *(inPtr) - *(inPtr - 1);
325
2/2
✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
14 if (*outPtr > bound)
326 4 *outPtr -= 2 * bound;
327
2/2
✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
14 if (*outPtr < -bound)
328 1 *outPtr += 2 * bound;
329 14 inPtr++;
330 14 outPtr++;
331 }
332
333 2 *saveValue = inputVector[num_points - 1];
334 }
335 #endif /* LV_HAVE_AVX */
336
337
338 #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H */
339