| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* -*- c++ -*- */ | ||
| 2 | /* | ||
| 3 | * Copyright 2012, 2014 Free Software Foundation, Inc. | ||
| 4 | * | ||
| 5 | * This file is part of VOLK | ||
| 6 | * | ||
| 7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
| 8 | */ | ||
| 9 | |||
| 10 | /*! | ||
| 11 | * \page volk_32f_s32f_32f_fm_detect_32f | ||
| 12 | * | ||
| 13 | * \b Overview | ||
| 14 | * | ||
| 15 | * Performs FM-detect differentiation on the input vector and stores | ||
| 16 | * the results in the output vector. | ||
| 17 | * | ||
| 18 | * <b>Dispatcher Prototype</b> | ||
| 19 | * \code | ||
| 20 | * void volk_32f_s32f_32f_fm_detect_32f(float* outputVector, const float* inputVector, | ||
| 21 | * const float bound, float* saveValue, unsigned int num_points) \endcode | ||
| 22 | * | ||
| 23 | * \b Inputs | ||
| 24 | * \li inputVector: The input vector containing phase data (must be on the interval | ||
| 25 | * (-bound, bound]). \li bound: The interval that the input phase data is in, which is | ||
| 26 | * used to modulo the differentiation. \li saveValue: A pointer to a float which contains | ||
| 27 | * the phase value of the sample before the first input sample. \li num_points The number | ||
| 28 | * of data points. | ||
| 29 | * | ||
| 30 | * \b Outputs | ||
| 31 | * \li outputVector: The vector where the results will be stored. | ||
| 32 | * | ||
| 33 | * \b Example | ||
| 34 | * \code | ||
| 35 | * int N = 10000; | ||
| 36 | * | ||
| 37 | * <FIXME> | ||
| 38 | * | ||
| 39 | * volk_32f_s32f_32f_fm_detect_32f(); | ||
| 40 | * | ||
| 41 | * \endcode | ||
| 42 | */ | ||
| 43 | |||
| 44 | #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H | ||
| 45 | #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H | ||
| 46 | |||
| 47 | #include <inttypes.h> | ||
| 48 | #include <stdio.h> | ||
| 49 | |||
| 50 | #ifdef LV_HAVE_AVX | ||
| 51 | #include <immintrin.h> | ||
| 52 | |||
| 53 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_a_avx(float* outputVector, | |
| 54 | const float* inputVector, | ||
| 55 | const float bound, | ||
| 56 | float* saveValue, | ||
| 57 | unsigned int num_points) | ||
| 58 | { | ||
| 59 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
| 60 | ✗ | return; | |
| 61 | } | ||
| 62 | 2 | unsigned int number = 1; | |
| 63 | 2 | unsigned int j = 0; | |
| 64 | // num_points-1 keeps Fedora 7's gcc from crashing... | ||
| 65 | // num_points won't work. :( | ||
| 66 | 2 | const unsigned int eighthPoints = (num_points - 1) / 8; | |
| 67 | |||
| 68 | 2 | float* outPtr = outputVector; | |
| 69 | 2 | const float* inPtr = inputVector; | |
| 70 | 2 | __m256 upperBound = _mm256_set1_ps(bound); | |
| 71 | 2 | __m256 lowerBound = _mm256_set1_ps(-bound); | |
| 72 | __m256 next3old1; | ||
| 73 | __m256 next4; | ||
| 74 | __m256 boundAdjust; | ||
| 75 | 2 | __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above. | |
| 76 | 2 | __m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below. | |
| 77 | // Do the first 8 by hand since we're going in from the saveValue: | ||
| 78 | 2 | *outPtr = *inPtr - *saveValue; | |
| 79 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
| 80 | ✗ | *outPtr -= 2 * bound; | |
| 81 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
| 82 | ✗ | *outPtr += 2 * bound; | |
| 83 | 2 | inPtr++; | |
| 84 | 2 | outPtr++; | |
| 85 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) { |
| 86 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 87 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr > bound) |
| 88 | 1 | *outPtr -= 2 * bound; | |
| 89 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
| 90 | 1 | *outPtr += 2 * bound; | |
| 91 | 14 | inPtr++; | |
| 92 | 14 | outPtr++; | |
| 93 | } | ||
| 94 | |||
| 95 |
2/2✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.
|
32766 | for (; number < eighthPoints; number++) { |
| 96 | // Load data | ||
| 97 | 65528 | next3old1 = _mm256_loadu_ps((float*)(inPtr - 1)); | |
| 98 | 32764 | next4 = _mm256_load_ps(inPtr); | |
| 99 | 32764 | inPtr += 8; | |
| 100 | // Subtract and store: | ||
| 101 | 32764 | next3old1 = _mm256_sub_ps(next4, next3old1); | |
| 102 | // Bound: | ||
| 103 | 32764 | boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS); | |
| 104 | 32764 | boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust); | |
| 105 | 32764 | next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS); | |
| 106 | 32764 | next4 = _mm256_and_ps(next4, negBoundAdjust); | |
| 107 | 32764 | boundAdjust = _mm256_or_ps(next4, boundAdjust); | |
| 108 | // Make sure we're in the bounding interval: | ||
| 109 | 32764 | next3old1 = _mm256_add_ps(next3old1, boundAdjust); | |
| 110 | _mm256_store_ps(outPtr, next3old1); // Store the results back into the output | ||
| 111 | 32764 | outPtr += 8; | |
| 112 | } | ||
| 113 | |||
| 114 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points; |
| 115 | 14 | number++) { | |
| 116 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 117 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | if (*outPtr > bound) |
| 118 | 4 | *outPtr -= 2 * bound; | |
| 119 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
| 120 | 1 | *outPtr += 2 * bound; | |
| 121 | 14 | inPtr++; | |
| 122 | 14 | outPtr++; | |
| 123 | } | ||
| 124 | |||
| 125 | 2 | *saveValue = inputVector[num_points - 1]; | |
| 126 | } | ||
| 127 | #endif /* LV_HAVE_AVX */ | ||
| 128 | |||
| 129 | |||
| 130 | #ifdef LV_HAVE_SSE | ||
| 131 | #include <xmmintrin.h> | ||
| 132 | |||
| 133 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, | |
| 134 | const float* inputVector, | ||
| 135 | const float bound, | ||
| 136 | float* saveValue, | ||
| 137 | unsigned int num_points) | ||
| 138 | { | ||
| 139 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
| 140 | ✗ | return; | |
| 141 | } | ||
| 142 | 2 | unsigned int number = 1; | |
| 143 | 2 | unsigned int j = 0; | |
| 144 | // num_points-1 keeps Fedora 7's gcc from crashing... | ||
| 145 | // num_points won't work. :( | ||
| 146 | 2 | const unsigned int quarterPoints = (num_points - 1) / 4; | |
| 147 | |||
| 148 | 2 | float* outPtr = outputVector; | |
| 149 | 2 | const float* inPtr = inputVector; | |
| 150 | 2 | __m128 upperBound = _mm_set_ps1(bound); | |
| 151 | 2 | __m128 lowerBound = _mm_set_ps1(-bound); | |
| 152 | __m128 next3old1; | ||
| 153 | __m128 next4; | ||
| 154 | __m128 boundAdjust; | ||
| 155 | 2 | __m128 posBoundAdjust = _mm_set_ps1(-2 * bound); // Subtract when we're above. | |
| 156 | 2 | __m128 negBoundAdjust = _mm_set_ps1(2 * bound); // Add when we're below. | |
| 157 | // Do the first 4 by hand since we're going in from the saveValue: | ||
| 158 | 2 | *outPtr = *inPtr - *saveValue; | |
| 159 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
| 160 | ✗ | *outPtr -= 2 * bound; | |
| 161 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
| 162 | ✗ | *outPtr += 2 * bound; | |
| 163 | 2 | inPtr++; | |
| 164 | 2 | outPtr++; | |
| 165 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | for (j = 1; j < ((4 < num_points) ? 4 : num_points); j++) { |
| 166 | 6 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 167 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (*outPtr > bound) |
| 168 | 1 | *outPtr -= 2 * bound; | |
| 169 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (*outPtr < -bound) |
| 170 | 1 | *outPtr += 2 * bound; | |
| 171 | 6 | inPtr++; | |
| 172 | 6 | outPtr++; | |
| 173 | } | ||
| 174 | |||
| 175 |
2/2✓ Branch 0 taken 65532 times.
✓ Branch 1 taken 2 times.
|
65534 | for (; number < quarterPoints; number++) { |
| 176 | // Load data | ||
| 177 | 131064 | next3old1 = _mm_loadu_ps((float*)(inPtr - 1)); | |
| 178 | 65532 | next4 = _mm_load_ps(inPtr); | |
| 179 | 65532 | inPtr += 4; | |
| 180 | // Subtract and store: | ||
| 181 | 65532 | next3old1 = _mm_sub_ps(next4, next3old1); | |
| 182 | // Bound: | ||
| 183 | 65532 | boundAdjust = _mm_cmpgt_ps(next3old1, upperBound); | |
| 184 | 65532 | boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust); | |
| 185 | 65532 | next4 = _mm_cmplt_ps(next3old1, lowerBound); | |
| 186 | 65532 | next4 = _mm_and_ps(next4, negBoundAdjust); | |
| 187 | 65532 | boundAdjust = _mm_or_ps(next4, boundAdjust); | |
| 188 | // Make sure we're in the bounding interval: | ||
| 189 | 65532 | next3old1 = _mm_add_ps(next3old1, boundAdjust); | |
| 190 | _mm_store_ps(outPtr, next3old1); // Store the results back into the output | ||
| 191 | 65532 | outPtr += 4; | |
| 192 | } | ||
| 193 | |||
| 194 | 2 | for (number = (4 > (quarterPoints * 4) ? 4 : (4 * quarterPoints)); | |
| 195 |
2/2✓ Branch 0 taken 6 times.
✓ Branch 1 taken 2 times.
|
8 | number < num_points; |
| 196 | 6 | number++) { | |
| 197 | 6 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 198 |
2/2✓ Branch 0 taken 2 times.
✓ Branch 1 taken 4 times.
|
6 | if (*outPtr > bound) |
| 199 | 2 | *outPtr -= 2 * bound; | |
| 200 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 5 times.
|
6 | if (*outPtr < -bound) |
| 201 | 1 | *outPtr += 2 * bound; | |
| 202 | 6 | inPtr++; | |
| 203 | 6 | outPtr++; | |
| 204 | } | ||
| 205 | |||
| 206 | 2 | *saveValue = inputVector[num_points - 1]; | |
| 207 | } | ||
| 208 | #endif /* LV_HAVE_SSE */ | ||
| 209 | |||
| 210 | #ifdef LV_HAVE_GENERIC | ||
| 211 | |||
| 212 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_generic(float* outputVector, | |
| 213 | const float* inputVector, | ||
| 214 | const float bound, | ||
| 215 | float* saveValue, | ||
| 216 | unsigned int num_points) | ||
| 217 | { | ||
| 218 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
| 219 | ✗ | return; | |
| 220 | } | ||
| 221 | 2 | unsigned int number = 0; | |
| 222 | 2 | float* outPtr = outputVector; | |
| 223 | 2 | const float* inPtr = inputVector; | |
| 224 | |||
| 225 | // Do the first 1 by hand since we're going in from the saveValue: | ||
| 226 | 2 | *outPtr = *inPtr - *saveValue; | |
| 227 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
| 228 | ✗ | *outPtr -= 2 * bound; | |
| 229 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
| 230 | ✗ | *outPtr += 2 * bound; | |
| 231 | 2 | inPtr++; | |
| 232 | 2 | outPtr++; | |
| 233 | |||
| 234 |
2/2✓ Branch 0 taken 262140 times.
✓ Branch 1 taken 2 times.
|
262142 | for (number = 1; number < num_points; number++) { |
| 235 | 262140 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 236 |
2/2✓ Branch 0 taken 32572 times.
✓ Branch 1 taken 229568 times.
|
262140 | if (*outPtr > bound) |
| 237 | 32572 | *outPtr -= 2 * bound; | |
| 238 |
2/2✓ Branch 0 taken 32879 times.
✓ Branch 1 taken 229261 times.
|
262140 | if (*outPtr < -bound) |
| 239 | 32879 | *outPtr += 2 * bound; | |
| 240 | 262140 | inPtr++; | |
| 241 | 262140 | outPtr++; | |
| 242 | } | ||
| 243 | |||
| 244 | 2 | *saveValue = inputVector[num_points - 1]; | |
| 245 | } | ||
| 246 | #endif /* LV_HAVE_GENERIC */ | ||
| 247 | |||
| 248 | |||
| 249 | #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */ | ||
| 250 | |||
| 251 | |||
| 252 | #ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H | ||
| 253 | #define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H | ||
| 254 | |||
| 255 | #include <inttypes.h> | ||
| 256 | #include <stdio.h> | ||
| 257 | |||
| 258 | #ifdef LV_HAVE_AVX | ||
| 259 | #include <immintrin.h> | ||
| 260 | |||
| 261 | 2 | static inline void volk_32f_s32f_32f_fm_detect_32f_u_avx(float* outputVector, | |
| 262 | const float* inputVector, | ||
| 263 | const float bound, | ||
| 264 | float* saveValue, | ||
| 265 | unsigned int num_points) | ||
| 266 | { | ||
| 267 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (num_points < 1) { |
| 268 | ✗ | return; | |
| 269 | } | ||
| 270 | 2 | unsigned int number = 1; | |
| 271 | 2 | unsigned int j = 0; | |
| 272 | // num_points-1 keeps Fedora 7's gcc from crashing... | ||
| 273 | // num_points won't work. :( | ||
| 274 | 2 | const unsigned int eighthPoints = (num_points - 1) / 8; | |
| 275 | |||
| 276 | 2 | float* outPtr = outputVector; | |
| 277 | 2 | const float* inPtr = inputVector; | |
| 278 | 2 | __m256 upperBound = _mm256_set1_ps(bound); | |
| 279 | 2 | __m256 lowerBound = _mm256_set1_ps(-bound); | |
| 280 | __m256 next3old1; | ||
| 281 | __m256 next4; | ||
| 282 | __m256 boundAdjust; | ||
| 283 | 2 | __m256 posBoundAdjust = _mm256_set1_ps(-2 * bound); // Subtract when we're above. | |
| 284 | 2 | __m256 negBoundAdjust = _mm256_set1_ps(2 * bound); // Add when we're below. | |
| 285 | // Do the first 8 by hand since we're going in from the saveValue: | ||
| 286 | 2 | *outPtr = *inPtr - *saveValue; | |
| 287 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr > bound) |
| 288 | ✗ | *outPtr -= 2 * bound; | |
| 289 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2 times.
|
2 | if (*outPtr < -bound) |
| 290 | ✗ | *outPtr += 2 * bound; | |
| 291 | 2 | inPtr++; | |
| 292 | 2 | outPtr++; | |
| 293 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (j = 1; j < ((8 < num_points) ? 8 : num_points); j++) { |
| 294 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 295 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr > bound) |
| 296 | 1 | *outPtr -= 2 * bound; | |
| 297 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
| 298 | 1 | *outPtr += 2 * bound; | |
| 299 | 14 | inPtr++; | |
| 300 | 14 | outPtr++; | |
| 301 | } | ||
| 302 | |||
| 303 |
2/2✓ Branch 0 taken 32764 times.
✓ Branch 1 taken 2 times.
|
32766 | for (; number < eighthPoints; number++) { |
| 304 | // Load data | ||
| 305 | 65528 | next3old1 = _mm256_loadu_ps((float*)(inPtr - 1)); | |
| 306 | 32764 | next4 = _mm256_loadu_ps(inPtr); | |
| 307 | 32764 | inPtr += 8; | |
| 308 | // Subtract and store: | ||
| 309 | 32764 | next3old1 = _mm256_sub_ps(next4, next3old1); | |
| 310 | // Bound: | ||
| 311 | 32764 | boundAdjust = _mm256_cmp_ps(next3old1, upperBound, _CMP_GT_OS); | |
| 312 | 32764 | boundAdjust = _mm256_and_ps(boundAdjust, posBoundAdjust); | |
| 313 | 32764 | next4 = _mm256_cmp_ps(next3old1, lowerBound, _CMP_LT_OS); | |
| 314 | 32764 | next4 = _mm256_and_ps(next4, negBoundAdjust); | |
| 315 | 32764 | boundAdjust = _mm256_or_ps(next4, boundAdjust); | |
| 316 | // Make sure we're in the bounding interval: | ||
| 317 | 32764 | next3old1 = _mm256_add_ps(next3old1, boundAdjust); | |
| 318 | _mm256_storeu_ps(outPtr, next3old1); // Store the results back into the output | ||
| 319 | 32764 | outPtr += 8; | |
| 320 | } | ||
| 321 | |||
| 322 |
2/2✓ Branch 0 taken 14 times.
✓ Branch 1 taken 2 times.
|
16 | for (number = (8 > (eighthPoints * 8) ? 8 : (8 * eighthPoints)); number < num_points; |
| 323 | 14 | number++) { | |
| 324 | 14 | *outPtr = *(inPtr) - *(inPtr - 1); | |
| 325 |
2/2✓ Branch 0 taken 4 times.
✓ Branch 1 taken 10 times.
|
14 | if (*outPtr > bound) |
| 326 | 4 | *outPtr -= 2 * bound; | |
| 327 |
2/2✓ Branch 0 taken 1 times.
✓ Branch 1 taken 13 times.
|
14 | if (*outPtr < -bound) |
| 328 | 1 | *outPtr += 2 * bound; | |
| 329 | 14 | inPtr++; | |
| 330 | 14 | outPtr++; | |
| 331 | } | ||
| 332 | |||
| 333 | 2 | *saveValue = inputVector[num_points - 1]; | |
| 334 | } | ||
| 335 | #endif /* LV_HAVE_AVX */ | ||
| 336 | |||
| 337 | |||
| 338 | #endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_u_H */ | ||
| 339 |