Line | Branch | Exec | Source |
---|---|---|---|
1 | |||
2 | |||
3 | /* this file was generated by volk template utils, do not edit! */ | ||
4 | |||
5 | /* -*- c++ -*- */ | ||
6 | /* | ||
7 | * Copyright 2011-2012 Free Software Foundation, Inc. | ||
8 | * | ||
9 | * This file is part of VOLK | ||
10 | * | ||
11 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
12 | */ | ||
13 | |||
14 | #include <volk/volk_cpu.h> | ||
15 | #include <volk/volk_config_fixed.h> | ||
16 | #include <stdlib.h> | ||
17 | #include <string.h> | ||
18 | |||
19 | |||
20 | #if defined(VOLK_CPU_FEATURES) | ||
21 | #include "cpu_features_macros.h" | ||
22 | #if defined(CPU_FEATURES_ARCH_X86) | ||
23 | #include "cpuinfo_x86.h" | ||
24 | #elif defined(CPU_FEATURES_ARCH_ARM) | ||
25 | #include "cpuinfo_arm.h" | ||
26 | #elif defined(CPU_FEATURES_ARCH_AARCH64) | ||
27 | #include "cpuinfo_aarch64.h" | ||
28 | #elif defined(CPU_FEATURES_ARCH_MIPS) | ||
29 | #include "cpuinfo_mips.h" | ||
30 | #elif defined(CPU_FEATURES_ARCH_PPC) | ||
31 | #include "cpuinfo_ppc.h" | ||
32 | #elif defined(CPU_FEATURES_ARCH_RISCV) | ||
33 | #include "cpuinfo_riscv.h" | ||
34 | #endif | ||
35 | |||
36 | // This is required for MSVC | ||
37 | #if defined(__cplusplus) | ||
38 | using namespace cpu_features; | ||
39 | #endif | ||
40 | #endif | ||
41 | |||
42 | |||
43 | struct VOLK_CPU volk_cpu; | ||
44 | |||
45 | 3102 | static int i_can_has_generic (void) { | |
46 | 3102 | return 1; | |
47 | } | ||
48 | |||
49 | 3102 | static int i_can_has_softfp (void) { | |
50 | 3102 | return 1; | |
51 | } | ||
52 | |||
53 | 3102 | static int i_can_has_hardfp (void) { | |
54 | 3102 | return 1; | |
55 | } | ||
56 | |||
57 | 3102 | static int i_can_has_32 (void) { | |
58 | 3102 | return 1; | |
59 | } | ||
60 | |||
61 | 3102 | static int i_can_has_64 (void) { | |
62 | 3102 | return 1; | |
63 | } | ||
64 | |||
65 | 3102 | static int i_can_has_popcount (void) { | |
66 | #if defined(CPU_FEATURES_ARCH_X86) | ||
67 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.popcnt == 0){ return 0; } |
68 | #endif | ||
69 | 3102 | return 1; | |
70 | } | ||
71 | |||
72 | 3102 | static int i_can_has_mmx (void) { | |
73 | #if defined(CPU_FEATURES_ARCH_X86) | ||
74 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.mmx == 0){ return 0; } |
75 | #endif | ||
76 | 3102 | return 1; | |
77 | } | ||
78 | |||
79 | 3102 | static int i_can_has_fma (void) { | |
80 | #if defined(CPU_FEATURES_ARCH_X86) | ||
81 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.fma3 == 0){ return 0; } |
82 | #endif | ||
83 | 3102 | return 1; | |
84 | } | ||
85 | |||
86 | 3102 | static int i_can_has_sse (void) { | |
87 | #if defined(CPU_FEATURES_ARCH_X86) | ||
88 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.sse == 0){ return 0; } |
89 | #endif | ||
90 | 3102 | return 1; | |
91 | } | ||
92 | |||
93 | 3102 | static int i_can_has_sse2 (void) { | |
94 | #if defined(CPU_FEATURES_ARCH_X86) | ||
95 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.sse2 == 0){ return 0; } |
96 | #endif | ||
97 | 3102 | return 1; | |
98 | } | ||
99 | |||
100 | 3102 | static int i_can_has_orc (void) { | |
101 | 3102 | return 1; | |
102 | } | ||
103 | |||
104 | 3102 | static int i_can_has_norc (void) { | |
105 | 3102 | return 1; | |
106 | } | ||
107 | |||
108 | 3102 | static int i_can_has_neon (void) { | |
109 | #if defined(CPU_FEATURES_ARCH_ARM) | ||
110 | if (GetArmInfo().features.neon == 0){ return 0; } | ||
111 | #endif | ||
112 | 3102 | return 1; | |
113 | } | ||
114 | |||
115 | 3102 | static int i_can_has_neonv7 (void) { | |
116 | #if defined(CPU_FEATURES_ARCH_ARM) | ||
117 | if (GetArmInfo().features.neon == 0){ return 0; } | ||
118 | #endif | ||
119 | 3102 | return 1; | |
120 | } | ||
121 | |||
122 | 3102 | static int i_can_has_neonv8 (void) { | |
123 | #if defined(CPU_FEATURES_ARCH_ARM) | ||
124 | if (GetArmInfo().features.neon == 0){ return 0; } | ||
125 | #endif | ||
126 | 3102 | return 1; | |
127 | } | ||
128 | |||
129 | 3102 | static int i_can_has_sse3 (void) { | |
130 | #if defined(CPU_FEATURES_ARCH_X86) | ||
131 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.sse3 == 0){ return 0; } |
132 | #endif | ||
133 | 3102 | return 1; | |
134 | } | ||
135 | |||
136 | 3102 | static int i_can_has_ssse3 (void) { | |
137 | #if defined(CPU_FEATURES_ARCH_X86) | ||
138 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.ssse3 == 0){ return 0; } |
139 | #endif | ||
140 | 3102 | return 1; | |
141 | } | ||
142 | |||
143 | 3102 | static int i_can_has_sse4_a (void) { | |
144 | #if defined(CPU_FEATURES_ARCH_X86) | ||
145 |
1/2✓ Branch 1 taken 3102 times.
✗ Branch 2 not taken.
|
3102 | if (GetX86Info().features.sse4a == 0){ return 0; } |
146 | #endif | ||
147 | ✗ | return 1; | |
148 | } | ||
149 | |||
150 | 3102 | static int i_can_has_sse4_1 (void) { | |
151 | #if defined(CPU_FEATURES_ARCH_X86) | ||
152 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.sse4_1 == 0){ return 0; } |
153 | #endif | ||
154 | 3102 | return 1; | |
155 | } | ||
156 | |||
157 | 3102 | static int i_can_has_sse4_2 (void) { | |
158 | #if defined(CPU_FEATURES_ARCH_X86) | ||
159 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.sse4_2 == 0){ return 0; } |
160 | #endif | ||
161 | 3102 | return 1; | |
162 | } | ||
163 | |||
164 | 3102 | static int i_can_has_avx (void) { | |
165 | #if defined(CPU_FEATURES_ARCH_X86) | ||
166 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.avx == 0){ return 0; } |
167 | #endif | ||
168 | 3102 | return 1; | |
169 | } | ||
170 | |||
171 | 3102 | static int i_can_has_avx2 (void) { | |
172 | #if defined(CPU_FEATURES_ARCH_X86) | ||
173 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 3102 times.
|
3102 | if (GetX86Info().features.avx2 == 0){ return 0; } |
174 | #endif | ||
175 | 3102 | return 1; | |
176 | } | ||
177 | |||
178 | 3102 | static int i_can_has_avx512f (void) { | |
179 | #if defined(CPU_FEATURES_ARCH_X86) | ||
180 |
1/2✓ Branch 1 taken 3102 times.
✗ Branch 2 not taken.
|
3102 | if (GetX86Info().features.avx512f == 0){ return 0; } |
181 | #endif | ||
182 | ✗ | return 1; | |
183 | } | ||
184 | |||
185 | 3102 | static int i_can_has_avx512cd (void) { | |
186 | #if defined(CPU_FEATURES_ARCH_X86) | ||
187 |
1/2✓ Branch 1 taken 3102 times.
✗ Branch 2 not taken.
|
3102 | if (GetX86Info().features.avx512cd == 0){ return 0; } |
188 | #endif | ||
189 | ✗ | return 1; | |
190 | } | ||
191 | |||
192 | 3102 | static int i_can_has_riscv64 (void) { | |
193 | 3102 | return 1; | |
194 | } | ||
195 | |||
196 | |||
197 | #if defined(HAVE_FENV_H) | ||
198 | #if defined(FE_TONEAREST) | ||
199 | #include <fenv.h> | ||
200 | static inline void set_float_rounding(void){ | ||
201 | fesetround(FE_TONEAREST); | ||
202 | } | ||
203 | #else | ||
204 | 3102 | static inline void set_float_rounding(void){ | |
205 | //do nothing | ||
206 | 3102 | } | |
207 | #endif | ||
208 | #elif defined(_MSC_VER) | ||
209 | #include <float.h> | ||
210 | static inline void set_float_rounding(void){ | ||
211 | unsigned int cwrd; | ||
212 | _controlfp_s(&cwrd, 0, 0); | ||
213 | _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC); | ||
214 | } | ||
215 | #else | ||
216 | static inline void set_float_rounding(void){ | ||
217 | //do nothing | ||
218 | } | ||
219 | #endif | ||
220 | |||
221 | |||
222 | 3102 | void volk_cpu_init() { | |
223 | 3102 | volk_cpu.has_generic = &i_can_has_generic; | |
224 | 3102 | volk_cpu.has_softfp = &i_can_has_softfp; | |
225 | 3102 | volk_cpu.has_hardfp = &i_can_has_hardfp; | |
226 | 3102 | volk_cpu.has_32 = &i_can_has_32; | |
227 | 3102 | volk_cpu.has_64 = &i_can_has_64; | |
228 | 3102 | volk_cpu.has_popcount = &i_can_has_popcount; | |
229 | 3102 | volk_cpu.has_mmx = &i_can_has_mmx; | |
230 | 3102 | volk_cpu.has_fma = &i_can_has_fma; | |
231 | 3102 | volk_cpu.has_sse = &i_can_has_sse; | |
232 | 3102 | volk_cpu.has_sse2 = &i_can_has_sse2; | |
233 | 3102 | volk_cpu.has_orc = &i_can_has_orc; | |
234 | 3102 | volk_cpu.has_norc = &i_can_has_norc; | |
235 | 3102 | volk_cpu.has_neon = &i_can_has_neon; | |
236 | 3102 | volk_cpu.has_neonv7 = &i_can_has_neonv7; | |
237 | 3102 | volk_cpu.has_neonv8 = &i_can_has_neonv8; | |
238 | 3102 | volk_cpu.has_sse3 = &i_can_has_sse3; | |
239 | 3102 | volk_cpu.has_ssse3 = &i_can_has_ssse3; | |
240 | 3102 | volk_cpu.has_sse4_a = &i_can_has_sse4_a; | |
241 | 3102 | volk_cpu.has_sse4_1 = &i_can_has_sse4_1; | |
242 | 3102 | volk_cpu.has_sse4_2 = &i_can_has_sse4_2; | |
243 | 3102 | volk_cpu.has_avx = &i_can_has_avx; | |
244 | 3102 | volk_cpu.has_avx2 = &i_can_has_avx2; | |
245 | 3102 | volk_cpu.has_avx512f = &i_can_has_avx512f; | |
246 | 3102 | volk_cpu.has_avx512cd = &i_can_has_avx512cd; | |
247 | 3102 | volk_cpu.has_riscv64 = &i_can_has_riscv64; | |
248 | 3102 | set_float_rounding(); | |
249 | 3102 | } | |
250 | |||
251 | 3102 | unsigned int volk_get_lvarch() { | |
252 | 3102 | unsigned int retval = 0; | |
253 | 3102 | volk_cpu_init(); | |
254 | 3102 | retval += volk_cpu.has_generic() << LV_GENERIC; | |
255 | 3102 | retval += volk_cpu.has_softfp() << LV_SOFTFP; | |
256 | 3102 | retval += volk_cpu.has_hardfp() << LV_HARDFP; | |
257 | 3102 | retval += volk_cpu.has_32() << LV_32; | |
258 | 3102 | retval += volk_cpu.has_64() << LV_64; | |
259 | 3102 | retval += volk_cpu.has_popcount() << LV_POPCOUNT; | |
260 | 3102 | retval += volk_cpu.has_mmx() << LV_MMX; | |
261 | 3102 | retval += volk_cpu.has_fma() << LV_FMA; | |
262 | 3102 | retval += volk_cpu.has_sse() << LV_SSE; | |
263 | 3102 | retval += volk_cpu.has_sse2() << LV_SSE2; | |
264 | 3102 | retval += volk_cpu.has_orc() << LV_ORC; | |
265 | 3102 | retval += volk_cpu.has_norc() << LV_NORC; | |
266 | 3102 | retval += volk_cpu.has_neon() << LV_NEON; | |
267 | 3102 | retval += volk_cpu.has_neonv7() << LV_NEONV7; | |
268 | 3102 | retval += volk_cpu.has_neonv8() << LV_NEONV8; | |
269 | 3102 | retval += volk_cpu.has_sse3() << LV_SSE3; | |
270 | 3102 | retval += volk_cpu.has_ssse3() << LV_SSSE3; | |
271 | 3102 | retval += volk_cpu.has_sse4_a() << LV_SSE4_A; | |
272 | 3102 | retval += volk_cpu.has_sse4_1() << LV_SSE4_1; | |
273 | 3102 | retval += volk_cpu.has_sse4_2() << LV_SSE4_2; | |
274 | 3102 | retval += volk_cpu.has_avx() << LV_AVX; | |
275 | 3102 | retval += volk_cpu.has_avx2() << LV_AVX2; | |
276 | 3102 | retval += volk_cpu.has_avx512f() << LV_AVX512F; | |
277 | 3102 | retval += volk_cpu.has_avx512cd() << LV_AVX512CD; | |
278 | 3102 | retval += volk_cpu.has_riscv64() << LV_RISCV64; | |
279 | 3102 | return retval; | |
280 | } | ||
281 |