| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* -*- c++ -*- */ | ||
| 2 | /* | ||
| 3 | * Copyright 2015 Free Software Foundation, Inc. | ||
| 4 | * | ||
| 5 | * This file is part of VOLK | ||
| 6 | * | ||
| 7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
| 8 | */ | ||
| 9 | |||
| 10 | /* | ||
| 11 | * This file is intended to hold SSE3 intrinsics of intrinsics. | ||
| 12 | * They should be used in VOLK kernels to avoid copy-pasta. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ | ||
| 16 | #define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ | ||
| 17 | #include <pmmintrin.h> | ||
| 18 | |||
| 19 | 1048556 | static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) | |
| 20 | { | ||
| 21 | __m128 yl, yh, tmp1, tmp2; | ||
| 22 | 1048556 | yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr | |
| 23 | 1048556 | yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di | |
| 24 | 1048556 | tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr | |
| 25 | 1048556 | x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br | |
| 26 | 1048556 | tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di | |
| 27 | 1048556 | return _mm_addsub_ps(tmp1, | |
| 28 | tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di | ||
| 29 | } | ||
| 30 | |||
| 31 | 786416 | static inline __m128 _mm_complexconjugatemul_ps(__m128 x, __m128 y) | |
| 32 | { | ||
| 33 | 786416 | const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); | |
| 34 | 786416 | y = _mm_xor_ps(y, conjugator); // conjugate y | |
| 35 | 786416 | return _mm_complexmul_ps(x, y); | |
| 36 | } | ||
| 37 | |||
| 38 | 524272 | static inline __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2) | |
| 39 | { | ||
| 40 | 524272 | cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values | |
| 41 | 524272 | cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values | |
| 42 | 524272 | return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values | |
| 43 | } | ||
| 44 | |||
| 45 | 131068 | static inline __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2) | |
| 46 | { | ||
| 47 | 262136 | return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2)); | |
| 48 | } | ||
| 49 | |||
| 50 | 131068 | static inline __m128 _mm_scaled_norm_dist_ps_sse3(const __m128 symbols0, | |
| 51 | const __m128 symbols1, | ||
| 52 | const __m128 points0, | ||
| 53 | const __m128 points1, | ||
| 54 | const __m128 scalar) | ||
| 55 | { | ||
| 56 | /* | ||
| 57 | * Calculate: |y - x|^2 * SNR_lin | ||
| 58 | * Consider 'symbolsX' and 'pointsX' to be complex float | ||
| 59 | * 'symbolsX' are 'y' and 'pointsX' are 'x' | ||
| 60 | */ | ||
| 61 | 131068 | const __m128 diff0 = _mm_sub_ps(symbols0, points0); | |
| 62 | 131068 | const __m128 diff1 = _mm_sub_ps(symbols1, points1); | |
| 63 | 131068 | const __m128 norms = _mm_magnitudesquared_ps_sse3(diff0, diff1); | |
| 64 | 131068 | return _mm_mul_ps(norms, scalar); | |
| 65 | } | ||
| 66 | |||
| 67 | #endif /* INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ */ | ||
| 68 |