GCC Code Coverage Report


Directory: ./
File: include/volk/volk_sse3_intrinsics.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 22 22 100.0%
Functions: 5 5 100.0%
Branches: 0 0 -%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2015 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*
11 * This file is intended to hold SSE3 intrinsics of intrinsics.
12 * They should be used in VOLK kernels to avoid copy-pasta.
13 */
14
15 #ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
16 #define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
17 #include <pmmintrin.h>
18
19 1048556 static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y)
20 {
21 __m128 yl, yh, tmp1, tmp2;
22 1048556 yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
23 1048556 yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
24 1048556 tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
25 1048556 x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
26 1048556 tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
27 1048556 return _mm_addsub_ps(tmp1,
28 tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
29 }
30
31 786416 static inline __m128 _mm_complexconjugatemul_ps(__m128 x, __m128 y)
32 {
33 786416 const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
34 786416 y = _mm_xor_ps(y, conjugator); // conjugate y
35 786416 return _mm_complexmul_ps(x, y);
36 }
37
38 524272 static inline __m128 _mm_magnitudesquared_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
39 {
40 524272 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
41 524272 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
42 524272 return _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
43 }
44
45 131068 static inline __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
46 {
47 262136 return _mm_sqrt_ps(_mm_magnitudesquared_ps_sse3(cplxValue1, cplxValue2));
48 }
49
50 131068 static inline __m128 _mm_scaled_norm_dist_ps_sse3(const __m128 symbols0,
51 const __m128 symbols1,
52 const __m128 points0,
53 const __m128 points1,
54 const __m128 scalar)
55 {
56 /*
57 * Calculate: |y - x|^2 * SNR_lin
58 * Consider 'symbolsX' and 'pointsX' to be complex float
59 * 'symbolsX' are 'y' and 'pointsX' are 'x'
60 */
61 131068 const __m128 diff0 = _mm_sub_ps(symbols0, points0);
62 131068 const __m128 diff1 = _mm_sub_ps(symbols1, points1);
63 131068 const __m128 norms = _mm_magnitudesquared_ps_sse3(diff0, diff1);
64 131068 return _mm_mul_ps(norms, scalar);
65 }
66
67 #endif /* INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ */
68