GCC Code Coverage Report


Directory: ./
File: include/volk/volk_avx2_fma_intrinsics.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 18 18 100.0%
Functions: 1 1 100.0%
Branches: 0 0 -%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2023 Magnus Lundmark <magnuslundmark@gmail.com>
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10 /*
11 * This file is intended to hold AVX2 FMA intrinsics of intrinsics.
12 * They should be used in VOLK kernels to avoid copy-paste.
13 */
14
15 #ifndef INCLUDE_VOLK_VOLK_AVX2_FMA_INTRINSICS_H_
16 #define INCLUDE_VOLK_VOLK_AVX2_FMA_INTRINSICS_H_
17 #include <immintrin.h>
18
19 /*
20 * Approximate arctan(x) via polynomial expansion
21 * on the interval [-1, 1]
22 *
23 * Maximum relative error ~6.5e-7
24 * Polynomial evaluated via Horner's method
25 */
26 65532 static inline __m256 _m256_arctan_poly_avx2_fma(const __m256 x)
27 {
28 65532 const __m256 a1 = _mm256_set1_ps(+0x1.ffffeap-1f);
29 65532 const __m256 a3 = _mm256_set1_ps(-0x1.55437p-2f);
30 65532 const __m256 a5 = _mm256_set1_ps(+0x1.972be6p-3f);
31 65532 const __m256 a7 = _mm256_set1_ps(-0x1.1436ap-3f);
32 65532 const __m256 a9 = _mm256_set1_ps(+0x1.5785aap-4f);
33 65532 const __m256 a11 = _mm256_set1_ps(-0x1.2f3004p-5f);
34 65532 const __m256 a13 = _mm256_set1_ps(+0x1.01a37cp-7f);
35
36 65532 const __m256 x_times_x = _mm256_mul_ps(x, x);
37 __m256 arctan;
38 65532 arctan = a13;
39 65532 arctan = _mm256_fmadd_ps(x_times_x, arctan, a11);
40 65532 arctan = _mm256_fmadd_ps(x_times_x, arctan, a9);
41 65532 arctan = _mm256_fmadd_ps(x_times_x, arctan, a7);
42 65532 arctan = _mm256_fmadd_ps(x_times_x, arctan, a5);
43 65532 arctan = _mm256_fmadd_ps(x_times_x, arctan, a3);
44 65532 arctan = _mm256_fmadd_ps(x_times_x, arctan, a1);
45 65532 arctan = _mm256_mul_ps(x, arctan);
46
47 65532 return arctan;
48 }
49
50 #endif /* INCLUDE_VOLK_VOLK_AVX2_FMA_INTRINSICS_H_ */
51