GCC Code Coverage Report


Directory: ./
File: include/volk/volk_common.h
Date: 2023-10-23 23:10:04
Exec Total Coverage
Lines: 25 26 96.2%
Functions: 3 3 100.0%
Branches: 2 4 50.0%

Line Branch Exec Source
1 /* -*- c++ -*- */
2 /*
3 * Copyright 2010, 2011, 2015-2017, 2019, 2020 Free Software Foundation, Inc.
4 * Copyright 2023 Magnus Lundmark <magnuslundmark@gmail.com>
5 *
6 * This file is part of VOLK
7 *
8 * SPDX-License-Identifier: LGPL-3.0-or-later
9 */
10
11 #ifndef INCLUDED_LIBVOLK_COMMON_H
12 #define INCLUDED_LIBVOLK_COMMON_H
13
14 ////////////////////////////////////////////////////////////////////////
15 // Cross-platform attribute macros
16 ////////////////////////////////////////////////////////////////////////
17 #if _MSC_VER
18 #define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
19 #define __VOLK_ATTR_UNUSED
20 #define __VOLK_ATTR_INLINE __forceinline
21 #define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
22 #define __VOLK_ATTR_EXPORT __declspec(dllexport)
23 #define __VOLK_ATTR_IMPORT __declspec(dllimport)
24 #define __VOLK_PREFETCH(addr)
25 #define __VOLK_ASM __asm
26 #define __VOLK_VOLATILE
27 #elif defined(__clang__)
28 // AppleClang also defines __GNUC__, so do this check first. These
29 // will probably be the same as for __GNUC__, but let's keep them
30 // separate just to be safe.
31 #define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
32 #define __VOLK_ATTR_UNUSED __attribute__((unused))
33 #define __VOLK_ATTR_INLINE __attribute__((always_inline))
34 #define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
35 #define __VOLK_ASM __asm__
36 #define __VOLK_VOLATILE __volatile__
37 #define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
38 #define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
39 #define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
40 #elif defined __GNUC__
41 #define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
42 #define __VOLK_ATTR_UNUSED __attribute__((unused))
43 #define __VOLK_ATTR_INLINE __attribute__((always_inline))
44 #define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
45 #define __VOLK_ASM __asm__
46 #define __VOLK_VOLATILE __volatile__
47 #if __GNUC__ >= 4
48 #define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
49 #define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
50 #else
51 #define __VOLK_ATTR_EXPORT
52 #define __VOLK_ATTR_IMPORT
53 #endif
54 #define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
55 #elif _MSC_VER
56 #define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
57 #define __VOLK_ATTR_UNUSED
58 #define __VOLK_ATTR_INLINE __forceinline
59 #define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
60 #define __VOLK_ATTR_EXPORT __declspec(dllexport)
61 #define __VOLK_ATTR_IMPORT __declspec(dllimport)
62 #define __VOLK_PREFETCH(addr)
63 #define __VOLK_ASM __asm
64 #define __VOLK_VOLATILE
65 #else
66 #define __VOLK_ATTR_ALIGNED(x)
67 #define __VOLK_ATTR_UNUSED
68 #define __VOLK_ATTR_INLINE
69 #define __VOLK_ATTR_DEPRECATED
70 #define __VOLK_ATTR_EXPORT
71 #define __VOLK_ATTR_IMPORT
72 #define __VOLK_PREFETCH(addr)
73 #define __VOLK_ASM __asm__
74 #define __VOLK_VOLATILE __volatile__
75 #endif
76
77 ////////////////////////////////////////////////////////////////////////
78 // Ignore annoying warnings in MSVC
79 ////////////////////////////////////////////////////////////////////////
80 #if defined(_MSC_VER)
81 #pragma warning(disable : 4244) //'conversion' conversion from 'type1' to 'type2',
82 // possible loss of data
83 #pragma warning(disable : 4305) //'identifier' : truncation from 'type1' to 'type2'
84 #endif
85
86 ////////////////////////////////////////////////////////////////////////
87 // C-linkage declaration macros
88 // FIXME: due to the usage of complex.h, require gcc for c-linkage
89 ////////////////////////////////////////////////////////////////////////
90 #if defined(__cplusplus) && (__GNUC__)
91 #define __VOLK_DECL_BEGIN extern "C" {
92 #define __VOLK_DECL_END }
93 #else
94 #define __VOLK_DECL_BEGIN
95 #define __VOLK_DECL_END
96 #endif
97
98 ////////////////////////////////////////////////////////////////////////
99 // Define VOLK_API for library symbols
100 // http://gcc.gnu.org/wiki/Visibility
101 ////////////////////////////////////////////////////////////////////////
102 #ifdef volk_EXPORTS
103 #define VOLK_API __VOLK_ATTR_EXPORT
104 #else
105 #define VOLK_API __VOLK_ATTR_IMPORT
106 #endif
107
108 ////////////////////////////////////////////////////////////////////////
109 // The bit128 union used by some
110 ////////////////////////////////////////////////////////////////////////
111 #include <stdint.h>
112
113 #ifdef LV_HAVE_SSE
114 #ifdef _WIN32
115 #include <intrin.h>
116 #else
117 #include <x86intrin.h>
118 #endif
119 #endif
120
121 union bit128 {
122 uint8_t i8[16];
123 uint16_t i16[8];
124 uint32_t i[4];
125 float f[4];
126 double d[2];
127
128 #ifdef LV_HAVE_SSE
129 __m128 float_vec;
130 #endif
131
132 #ifdef LV_HAVE_SSE2
133 __m128i int_vec;
134 __m128d double_vec;
135 #endif
136 };
137
138 union bit256 {
139 uint8_t i8[32];
140 uint16_t i16[16];
141 uint32_t i[8];
142 float f[8];
143 double d[4];
144
145 #ifdef LV_HAVE_AVX
146 __m256 float_vec;
147 __m256i int_vec;
148 __m256d double_vec;
149 #endif
150 };
151
152 #define bit128_p(x) ((union bit128*)(x))
153 #define bit256_p(x) ((union bit256*)(x))
154
155 ////////////////////////////////////////////////////////////////////////
156 // log2f
157 ////////////////////////////////////////////////////////////////////////
158 #include <math.h>
159 // +-Inf -> +-127.0f in order to match the behaviour of the SIMD kernels
160 1310758 static inline float log2f_non_ieee(float f)
161 {
162 1310758 float const result = log2f(f);
163
1/2
✓ Branch 0 taken 1310758 times.
✗ Branch 1 not taken.
1310758 return isinf(result) ? copysignf(127.0f, result) : result;
164 }
165
166 ////////////////////////////////////////////////////////////////////////
167 // Constant used to do log10 calculations as faster log2
168 ////////////////////////////////////////////////////////////////////////
169 // precalculated 10.0 / log2f_non_ieee(10.0) to allow for constexpr
170 #define volk_log2to10factor (0x1.815182p1) // 3.01029995663981209120
171
172 ////////////////////////////////////////////////////////////////////////
173 // arctan(x)
174 ////////////////////////////////////////////////////////////////////////
175 262210 static inline float volk_arctan_poly(const float x)
176 {
177 /*
178 * arctan(x) polynomial expansion on the interval [-1, 1]
179 * Maximum relative error < 6.6e-7
180 */
181 262210 const float a1 = +0x1.ffffeap-1f;
182 262210 const float a3 = -0x1.55437p-2f;
183 262210 const float a5 = +0x1.972be6p-3f;
184 262210 const float a7 = -0x1.1436ap-3f;
185 262210 const float a9 = +0x1.5785aap-4f;
186 262210 const float a11 = -0x1.2f3004p-5f;
187 262210 const float a13 = +0x1.01a37cp-7f;
188
189 262210 const float x_times_x = x * x;
190 262210 float arctan = a13;
191 262210 arctan = fmaf(x_times_x, arctan, a11);
192 262210 arctan = fmaf(x_times_x, arctan, a9);
193 262210 arctan = fmaf(x_times_x, arctan, a7);
194 262210 arctan = fmaf(x_times_x, arctan, a5);
195 262210 arctan = fmaf(x_times_x, arctan, a3);
196 262210 arctan = fmaf(x_times_x, arctan, a1);
197 262210 arctan *= x;
198
199 262210 return arctan;
200 }
201
202 262210 static inline float volk_arctan(const float x)
203 {
204 /*
205 * arctan(x) + arctan(1 / x) == sign(x) * pi / 2
206 */
207 262210 const float pi_over_2 = 0x1.921fb6p0f;
208
209
1/2
✓ Branch 0 taken 262210 times.
✗ Branch 1 not taken.
262210 if (fabs(x) < 1.f) {
210 262210 return volk_arctan_poly(x);
211 } else {
212 return copysignf(pi_over_2, x) - volk_arctan_poly(1.f / x);
213 }
214 }
215
216 #endif /*INCLUDED_LIBVOLK_COMMON_H*/
217