Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | /*! | ||
11 | * \page volk_32f_s32f_32f_fm_detect_32f | ||
12 | * | ||
13 | * \b Overview | ||
14 | * | ||
15 | * Performs FM-detect differentiation on the input vector and stores | ||
16 | * the results in the output vector. | ||
17 | * | ||
18 | * <b>Dispatcher Prototype</b> | ||
19 | * \code | ||
20 | * void volk_32f_s32f_32f_fm_detect_32f(float* outputVector, const float* inputVector, | ||
21 | * const float bound, float* saveValue, unsigned int num_points) \endcode | ||
22 | * | ||
23 | * \b Inputs | ||
24 | * \li inputVector: The input vector containing phase data (must be on the interval | ||
25 | * (-bound, bound]). \li bound: The interval that the input phase data is in, which is | ||
26 | * used to modulo the differentiation. \li saveValue: A pointer to a float which contains | ||
27 | * the phase value of the sample before the first input sample. \li num_points The number | ||
28 | * of data points. | ||
29 | * | ||
30 | * \b Outputs | ||
31 | * \li outputVector: The vector where the results will be stored. | ||
32 | * | ||
33 | * \b Example | ||
34 | * \code | ||
35 | * int N = 10000; | ||
36 | * | ||
37 | * <FIXME> | ||
38 | * | ||
39 | * volk_32f_s32f_32f_fm_detect_32f(); | ||
40 | * | ||
41 | * \endcode | ||
42 | */ | ||
43 | |||
44 | #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H | ||
45 | #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H | ||
46 | |||
47 | #include <inttypes.h> | ||
48 | #include <stdio.h> | ||
49 | |||
50 | #ifdef LV_HAVE_AVX | ||
51 | #include <immintrin.h> | ||
52 | |||
53 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_a_avx(float* outputVector, | |
54 | const float* inputVector, | ||
55 | const float bound, | ||
56 | float* saveValue, | ||
57 | unsigned int num_points) | ||
58 | { | ||
59 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
60 | ✗ | return; | |
61 | } | ||
62 | 2 | unsigned int number = 1; | |
63 | 2 | unsigned int j = 0; | |
64 | // num_points-1 keeps Fedora 7's gcc from crashing... | ||
65 | // num_points won't work. :( | ||
66 | 2 | const unsigned int eighthPoints = (num_points - 1) / 8; | |
67 | |||
68 | 2 | float* outPtr = outputVector; | |
69 | 2 | const float* inPtr = inputVector; | |
70 | 2 | __m256 upperBound = _mm256_set1_ps(bound); | |
71 | 2 | __m256 lowerBound = _mm256_set1_ps(-bound); | |
72 | __m256 next3old1; | ||
73 | __m256 next4; | ||
74 | __m256 boundAdjust; | ||
75 | 2 | __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above. | |
76 | 2 | __m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below. | |
77 | // Do the first 8 by hand since we're going in from the saveValue: | ||
78 | 2 | *outPtr = *inPtr - *saveValue; | |
79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
80 | ✗ | *outPtr -= 2 * bound; | |
81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
82 | ✗ | *outPtr += 2 * bound; | |
83 | 2 | inPtr++; | |
84 | 2 | outPtr++; | |
85 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) { |
86 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
87 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr > bound) |
88 | 1 | *outPtr -= 2 * bound; | |
89 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
90 | 1 | *outPtr += 2 * bound; | |
91 | 14 | inPtr++; | |
92 | 14 | outPtr++; | |
93 | } | ||
94 | |||
95 |
2/2✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.
|
32766 | for (; number < eighthPoints; number++) { |
96 | // Load data | ||
97 | 65528 | next3old1 = _mm256_loadu_ps((float*)(inPtr - 1)); | |
98 | 32764 | next4 = _mm256_load_ps(inPtr); | |
99 | 32764 | inPtr += 8; | |
100 | // Subtract and store: | ||
101 | 32764 | next3old1 = _mm256_sub_ps(next4, next3old1); | |
102 | // Bound: | ||
103 | 32764 | boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS); | |
104 | 32764 | boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust); | |
105 | 32764 | next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS); | |
106 | 32764 | next4 = _mm256_and_ps(next4, negBoundAdjust); | |
107 | 32764 | boundAdjust = _mm256_or_ps(next4, boundAdjust); | |
108 | // Make sure we're in the bounding interval: | ||
109 | 32764 | next3old1 = _mm256_add_ps(next3old1, boundAdjust); | |
110 | _mm256_store_ps(outPtr, next3old1); // Store the results back into the output | ||
111 | 32764 | outPtr += 8; | |
112 | } | ||
113 | |||
114 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points; |
115 | 14 | number++) { | |
116 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
117 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | if (*outPtr > bound) |
118 | 4 | *outPtr -= 2 * bound; | |
119 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
120 | 1 | *outPtr += 2 * bound; | |
121 | 14 | inPtr++; | |
122 | 14 | outPtr++; | |
123 | } | ||
124 | |||
125 | 2 | *saveValue = inputVector[num_points - 1]; | |
126 | } | ||
127 | #endif /* LV_HAVE_AVX */ | ||
128 | |||
129 | |||
130 | #ifdef LV_HAVE_SSE | ||
131 | #include <xmmintrin.h> | ||
132 | |||
133 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, | |
134 | const float* inputVector, | ||
135 | const float bound, | ||
136 | float* saveValue, | ||
137 | unsigned int num_points) | ||
138 | { | ||
139 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
140 | ✗ | return; | |
141 | } | ||
142 | 2 | unsigned int number = 1; | |
143 | 2 | unsigned int j = 0; | |
144 | // num_points-1 keeps Fedora 7's gcc from crashing... | ||
145 | // num_points won't work. :( | ||
146 | 2 | const unsigned int quarterPoints = (num_points - 1) / 4; | |
147 | |||
148 | 2 | float* outPtr = outputVector; | |
149 | 2 | const float* inPtr = inputVector; | |
150 | 2 | __m128 upperBound = _mm_set_ps1(bound); | |
151 | 2 | __m128 lowerBound = _mm_set_ps1(-bound); | |
152 | __m128 next3old1; | ||
153 | __m128 next4; | ||
154 | __m128 boundAdjust; | ||
155 | 2 | __m128 posBoundAdjust = _mm_set_ps1(-2 * bound); // Subtract when we're above. | |
156 | 2 | __m128 negBoundAdjust = _mm_set_ps1(2 * bound); // Add when we're below. | |
157 | // Do the first 4 by hand since we're going in from the saveValue: | ||
158 | 2 | *outPtr = *inPtr - *saveValue; | |
159 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
160 | ✗ | *outPtr -= 2 * bound; | |
161 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
162 | ✗ | *outPtr += 2 * bound; | |
163 | 2 | inPtr++; | |
164 | 2 | outPtr++; | |
165 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) { |
166 | 6 | *outPtr = *(inPtr) - *(inPtr - 1); | |
167 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (*outPtr > bound) |
168 | 1 | *outPtr -= 2 * bound; | |
169 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (*outPtr < -bound) |
170 | 1 | *outPtr += 2 * bound; | |
171 | 6 | inPtr++; | |
172 | 6 | outPtr++; | |
173 | } | ||
174 | |||
175 |
2/2✓ Branch 0 taken 65532 times.
✓ Branch 1 taken 2 times.
|
65534 | for (; number < quarterPoints; number++) { |
176 | // Load data | ||
177 | 131064 | next3old1 = _mm_loadu_ps((float*)(inPtr - 1)); | |
178 | 65532 | next4 = _mm_load_ps(inPtr); | |
179 | 65532 | inPtr += 4; | |
180 | // Subtract and store: | ||
181 | 65532 | next3old1 = _mm_sub_ps(next4, next3old1); | |
182 | // Bound: | ||
183 | 65532 | boundAdjust = _mm_cmpgt_ps(next3old1, upperBound); | |
184 | 65532 | boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust); | |
185 | 65532 | next4 = _mm_cmplt_ps(next3old1, lowerBound); | |
186 | 65532 | next4 = _mm_and_ps(next4, negBoundAdjust); | |
187 | 65532 | boundAdjust = _mm_or_ps(next4, boundAdjust); | |
188 | // Make sure we're in the bounding interval: | ||
189 | 65532 | next3old1 = _mm_add_ps(next3old1, boundAdjust); | |
190 | _mm_store_ps(outPtr, next3old1); // Store the results back into the output | ||
191 | 65532 | outPtr += 4; | |
192 | } | ||
193 | |||
194 | 2 | for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints)); | |
195 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | number < num_points; |
196 | 6 | number++) { | |
197 | 6 | *outPtr = *(inPtr) - *(inPtr - 1); | |
198 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (*outPtr > bound) |
199 | 2 | *outPtr -= 2 * bound; | |
200 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (*outPtr < -bound) |
201 | 1 | *outPtr += 2 * bound; | |
202 | 6 | inPtr++; | |
203 | 6 | outPtr++; | |
204 | } | ||
205 | |||
206 | 2 | *saveValue = inputVector[num_points - 1]; | |
207 | } | ||
208 | #endif /* LV_HAVE_SSE */ | ||
209 | |||
210 | #ifdef LV_HAVE_GENERIC | ||
211 | |||
212 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_generic(float* outputVector, | |
213 | const float* inputVector, | ||
214 | const float bound, | ||
215 | float* saveValue, | ||
216 | unsigned int num_points) | ||
217 | { | ||
218 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
219 | ✗ | return; | |
220 | } | ||
221 | 2 | unsigned int number = 0; | |
222 | 2 | float* outPtr = outputVector; | |
223 | 2 | const float* inPtr = inputVector; | |
224 | |||
225 | // Do the first 1 by hand since we're going in from the saveValue: | ||
226 | 2 | *outPtr = *inPtr - *saveValue; | |
227 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
228 | ✗ | *outPtr -= 2 * bound; | |
229 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
230 | ✗ | *outPtr += 2 * bound; | |
231 | 2 | inPtr++; | |
232 | 2 | outPtr++; | |
233 | |||
234 |
2/2✓ Branch 0 taken 262140 times.
✓ Branch 1 taken 2 times.
|
262142 | for (number = 1; number < num_points; number++) { |
235 | 262140 | *outPtr = *(inPtr) - *(inPtr - 1); | |
236 |
2/2✓ Branch 0 taken 32572 times.
✓ Branch 1 taken 229568 times.
|
262140 | if (*outPtr > bound) |
237 | 32572 | *outPtr -= 2 * bound; | |
238 |
2/2✓ Branch 0 taken 32879 times.
✓ Branch 1 taken 229261 times.
|
262140 | if (*outPtr < -bound) |
239 | 32879 | *outPtr += 2 * bound; | |
240 | 262140 | inPtr++; | |
241 | 262140 | outPtr++; | |
242 | } | ||
243 | |||
244 | 2 | *saveValue = inputVector[num_points - 1]; | |
245 | } | ||
246 | #endif /* LV_HAVE_GENERIC */ | ||
247 | |||
248 | |||
249 | #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */ | ||
250 | |||
251 | |||
252 | #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H | ||
253 | #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H | ||
254 | |||
255 | #include <inttypes.h> | ||
256 | #include <stdio.h> | ||
257 | |||
258 | #ifdef LV_HAVE_AVX | ||
259 | #include <immintrin.h> | ||
260 | |||
261 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_u_avx(float* outputVector, | |
262 | const float* inputVector, | ||
263 | const float bound, | ||
264 | float* saveValue, | ||
265 | unsigned int num_points) | ||
266 | { | ||
267 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
268 | ✗ | return; | |
269 | } | ||
270 | 2 | unsigned int number = 1; | |
271 | 2 | unsigned int j = 0; | |
272 | // num_points-1 keeps Fedora 7's gcc from crashing... | ||
273 | // num_points won't work. :( | ||
274 | 2 | const unsigned int eighthPoints = (num_points - 1) / 8; | |
275 | |||
276 | 2 | float* outPtr = outputVector; | |
277 | 2 | const float* inPtr = inputVector; | |
278 | 2 | __m256 upperBound = _mm256_set1_ps(bound); | |
279 | 2 | __m256 lowerBound = _mm256_set1_ps(-bound); | |
280 | __m256 next3old1; | ||
281 | __m256 next4; | ||
282 | __m256 boundAdjust; | ||
283 | 2 | __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above. | |
284 | 2 | __m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below. | |
285 | // Do the first 8 by hand since we're going in from the saveValue: | ||
286 | 2 | *outPtr = *inPtr - *saveValue; | |
287 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
288 | ✗ | *outPtr -= 2 * bound; | |
289 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
290 | ✗ | *outPtr += 2 * bound; | |
291 | 2 | inPtr++; | |
292 | 2 | outPtr++; | |
293 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) { |
294 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
295 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr > bound) |
296 | 1 | *outPtr -= 2 * bound; | |
297 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
298 | 1 | *outPtr += 2 * bound; | |
299 | 14 | inPtr++; | |
300 | 14 | outPtr++; | |
301 | } | ||
302 | |||
303 |
2/2✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.
|
32766 | for (; number < eighthPoints; number++) { |
304 | // Load data | ||
305 | 65528 | next3old1 = _mm256_loadu_ps((float*)(inPtr - 1)); | |
306 | 32764 | next4 = _mm256_loadu_ps(inPtr); | |
307 | 32764 | inPtr += 8; | |
308 | // Subtract and store: | ||
309 | 32764 | next3old1 = _mm256_sub_ps(next4, next3old1); | |
310 | // Bound: | ||
311 | 32764 | boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS); | |
312 | 32764 | boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust); | |
313 | 32764 | next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS); | |
314 | 32764 | next4 = _mm256_and_ps(next4, negBoundAdjust); | |
315 | 32764 | boundAdjust = _mm256_or_ps(next4, boundAdjust); | |
316 | // Make sure we're in the bounding interval: | ||
317 | 32764 | next3old1 = _mm256_add_ps(next3old1, boundAdjust); | |
318 | _mm256_storeu_ps(outPtr, next3old1); // Store the results back into the output | ||
319 | 32764 | outPtr += 8; | |
320 | } | ||
321 | |||
322 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points; |
323 | 14 | number++) { | |
324 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
325 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | if (*outPtr > bound) |
326 | 4 | *outPtr -= 2 * bound; | |
327 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
328 | 1 | *outPtr += 2 * bound; | |
329 | 14 | inPtr++; | |
330 | 14 | outPtr++; | |
331 | } | ||
332 | |||
333 | 2 | *saveValue = inputVector[num_points - 1]; | |
334 | } | ||
335 | #endif /* LV_HAVE_AVX */ | ||
336 | |||
337 | |||
338 | #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H */ | ||
339 |