Line | Branch | Exec | Source |
---|---|---|---|
1 | /* -*- c++ -*- */ | ||
2 | /* | ||
3 | * Copyright 2011 - 2020, 2022 Free Software Foundation, Inc. | ||
4 | * | ||
5 | * This file is part of VOLK | ||
6 | * | ||
7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
8 | */ | ||
9 | |||
10 | #include "qa_utils.h" | ||
11 | #include <volk/volk.h> | ||
12 | |||
13 | #include <volk/volk.h> // for volk_func_desc_t | ||
14 | #include <volk/volk_malloc.h> // for volk_free, volk_m... | ||
15 | |||
16 | #include <assert.h> // for assert | ||
17 | #include <stdint.h> // for uint16_t, uint64_t | ||
18 | #include <sys/time.h> // for CLOCKS_PER_SEC | ||
19 | #include <sys/types.h> // for int16_t, int32_t | ||
20 | #include <chrono> | ||
21 | #include <cmath> // for sqrt, fabs, abs | ||
22 | #include <cstring> // for memcpy, memset | ||
23 | #include <ctime> // for clock | ||
24 | #include <fstream> // for operator<<, basic... | ||
25 | #include <iostream> // for cout, cerr | ||
26 | #include <limits> // for numeric_limits | ||
27 | #include <map> // for map, map<>::mappe... | ||
28 | #include <random> | ||
29 | #include <vector> // for vector, _Bit_refe... | ||
30 | |||
31 | template <typename T> | ||
32 | 472 | void random_floats(void* buf, unsigned int n, std::default_random_engine& rnd_engine) | |
33 | { | ||
34 | 472 | T* array = static_cast<T*>(buf); | |
35 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
472 | std::uniform_real_distribution<T> uniform_dist(T(-1), T(1)); |
36 |
2/2✓ Branch 0 taken 42468624 times.
✓ Branch 1 taken 236 times.
|
84937720 | for (unsigned int i = 0; i < n; i++) { |
37 |
1/2✓ Branch 1 taken 42468624 times.
✗ Branch 2 not taken.
|
84937248 | array[i] = uniform_dist(rnd_engine); |
38 | } | ||
39 | 472 | } | |
40 | |||
41 | 320 | void load_random_data(void* data, volk_type_t type, unsigned int n) | |
42 | { | ||
43 |
1/2✓ Branch 1 taken 320 times.
✗ Branch 2 not taken.
|
320 | std::random_device rnd_device; |
44 |
2/4✓ Branch 1 taken 320 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 320 times.
✗ Branch 5 not taken.
|
320 | std::default_random_engine rnd_engine(rnd_device()); |
45 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 190 times.
|
320 | if (type.is_complex) |
46 | 130 | n *= 2; | |
47 |
2/2✓ Branch 0 taken 236 times.
✓ Branch 1 taken 84 times.
|
320 | if (type.is_float) { |
48 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 214 times.
|
236 | if (type.size == 8) { |
49 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | random_floats<double>(data, n, rnd_engine); |
50 | } else { | ||
51 |
1/2✓ Branch 1 taken 214 times.
✗ Branch 2 not taken.
|
214 | random_floats<float>(data, n, rnd_engine); |
52 | } | ||
53 | } else { | ||
54 | 84 | float int_max = float(uint64_t(2) << (type.size * 8)); | |
55 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 22 times.
|
84 | if (type.is_signed) |
56 | 62 | int_max /= 2.0; | |
57 |
1/2✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.
|
84 | std::uniform_real_distribution<float> uniform_dist(-int_max, int_max); |
58 |
2/2✓ Branch 0 taken 16515576 times.
✓ Branch 1 taken 84 times.
|
16515660 | for (unsigned int i = 0; i < n; i++) { |
59 |
1/2✓ Branch 1 taken 16515576 times.
✗ Branch 2 not taken.
|
16515576 | float scaled_rand = uniform_dist(rnd_engine); |
60 | // man i really don't know how to do this in a more clever way, you have to | ||
61 | // cast down at some point | ||
62 |
4/5✓ Branch 0 taken 524304 times.
✓ Branch 1 taken 2097216 times.
✓ Branch 2 taken 7340256 times.
✓ Branch 3 taken 6553800 times.
✗ Branch 4 not taken.
|
16515576 | switch (type.size) { |
63 | 524304 | case 8: | |
64 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 524304 times.
|
524304 | if (type.is_signed) |
65 | ✗ | ((int64_t*)data)[i] = (int64_t)scaled_rand; | |
66 | else | ||
67 | 524304 | ((uint64_t*)data)[i] = (uint64_t)scaled_rand; | |
68 | 524304 | break; | |
69 | 2097216 | case 4: | |
70 |
2/2✓ Branch 0 taken 1310760 times.
✓ Branch 1 taken 786456 times.
|
2097216 | if (type.is_signed) |
71 | 1310760 | ((int32_t*)data)[i] = (int32_t)scaled_rand; | |
72 | else | ||
73 | 786456 | ((uint32_t*)data)[i] = (uint32_t)scaled_rand; | |
74 | 2097216 | break; | |
75 | 7340256 | case 2: | |
76 |
2/2✓ Branch 0 taken 7078104 times.
✓ Branch 1 taken 262152 times.
|
7340256 | if (type.is_signed) |
77 | 7078104 | ((int16_t*)data)[i] = (int16_t)((int16_t)scaled_rand % 8); | |
78 | else | ||
79 | 262152 | ((uint16_t*)data)[i] = (uint16_t)((int16_t)scaled_rand % 8); | |
80 | 7340256 | break; | |
81 | 6553800 | case 1: | |
82 |
2/2✓ Branch 0 taken 5243040 times.
✓ Branch 1 taken 1310760 times.
|
6553800 | if (type.is_signed) |
83 | 5243040 | ((int8_t*)data)[i] = (int8_t)scaled_rand; | |
84 | else | ||
85 | 1310760 | ((uint8_t*)data)[i] = (uint8_t)scaled_rand; | |
86 | 6553800 | break; | |
87 | ✗ | default: | |
88 | ✗ | throw "load_random_data: no support for data size > 8 or < 1"; // no | |
89 | // shenanigans | ||
90 | // here | ||
91 | } | ||
92 | } | ||
93 | } | ||
94 | 320 | } | |
95 | |||
96 | 236 | static std::vector<std::string> get_arch_list(volk_func_desc_t desc) | |
97 | { | ||
98 | 236 | std::vector<std::string> archlist; | |
99 | |||
100 |
2/2✓ Branch 0 taken 1334 times.
✓ Branch 1 taken 236 times.
|
1570 | for (size_t i = 0; i < desc.n_impls; i++) { |
101 |
2/4✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1334 times.
✗ Branch 5 not taken.
|
2668 | archlist.push_back(std::string(desc.impl_names[i])); |
102 | } | ||
103 | |||
104 | 236 | return archlist; | |
105 | ✗ | } | |
106 | |||
107 | template <typename T> | ||
108 | 714 | T volk_lexical_cast(const std::string& str) | |
109 | { | ||
110 |
2/2✓ Branch 1 taken 1220 times.
✓ Branch 2 taken 626 times.
|
1846 | for (unsigned int c_index = 0; c_index < str.size(); ++c_index) { |
111 |
7/10✓ Branch 1 taken 1220 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1220 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 1220 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 88 times.
✓ Branch 9 taken 1132 times.
✓ Branch 10 taken 88 times.
✓ Branch 11 taken 1132 times.
|
1220 | if (str.at(c_index) < '0' || str.at(c_index) > '9') { |
112 | 88 | throw "not all numbers!"; | |
113 | } | ||
114 | } | ||
115 | T var; | ||
116 |
1/2✓ Branch 1 taken 626 times.
✗ Branch 2 not taken.
|
626 | std::istringstream iss; |
117 |
1/2✓ Branch 1 taken 626 times.
✗ Branch 2 not taken.
|
626 | iss.str(str); |
118 |
1/2✓ Branch 1 taken 626 times.
✗ Branch 2 not taken.
|
626 | iss >> var; |
119 | // deal with any error bits that may have been set on the stream | ||
120 | 626 | return var; | |
121 | 626 | } | |
122 | |||
123 | 966 | volk_type_t volk_type_from_string(std::string name) | |
124 | { | ||
125 | 966 | volk_type_t type; | |
126 | 966 | type.is_float = false; | |
127 | 966 | type.is_scalar = false; | |
128 | 966 | type.is_complex = false; | |
129 | 966 | type.is_signed = false; | |
130 | 966 | type.size = 0; | |
131 |
1/2✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
|
966 | type.str = name; |
132 | |||
133 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 966 times.
|
966 | if (name.size() < 2) { |
134 | ✗ | throw std::string("name too short to be a datatype"); | |
135 | } | ||
136 | |||
137 | // is it a scalar? | ||
138 |
3/4✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 96 times.
✓ Branch 4 taken 870 times.
|
966 | if (name[0] == 's') { |
139 | 96 | type.is_scalar = true; | |
140 |
1/2✓ Branch 2 taken 96 times.
✗ Branch 3 not taken.
|
96 | name = name.substr(1, name.size() - 1); |
141 | } | ||
142 | |||
143 | // get the data size | ||
144 | 966 | size_t last_size_pos = name.find_last_of("0123456789"); | |
145 |
2/2✓ Branch 0 taken 332 times.
✓ Branch 1 taken 634 times.
|
966 | if (last_size_pos == std::string::npos) { |
146 |
3/6✓ Branch 2 taken 332 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 332 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 332 times.
✗ Branch 9 not taken.
|
996 | throw std::string("no size spec in type ").append(name); |
147 | } | ||
148 | // will throw if malformed | ||
149 |
3/4✓ Branch 1 taken 634 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 546 times.
✓ Branch 5 taken 88 times.
|
722 | int size = volk_lexical_cast<int>(name.substr(0, last_size_pos + 1)); |
150 | |||
151 |
3/6✓ Branch 0 taken 546 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 546 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 546 times.
✗ Branch 5 not taken.
|
546 | assert(((size % 8) == 0) && (size <= 64) && (size != 0)); |
152 | 546 | type.size = size / 8; // in bytes | |
153 | |||
154 |
2/2✓ Branch 1 taken 704 times.
✓ Branch 2 taken 546 times.
|
1250 | for (size_t i = last_size_pos + 1; i < name.size(); i++) { |
155 |
5/7✓ Branch 1 taken 704 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 396 times.
✓ Branch 4 taken 98 times.
✓ Branch 5 taken 158 times.
✓ Branch 6 taken 52 times.
✗ Branch 7 not taken.
|
704 | switch (name[i]) { |
156 | 396 | case 'f': | |
157 | 396 | type.is_float = true; | |
158 | 396 | break; | |
159 | 98 | case 'i': | |
160 | 98 | type.is_signed = true; | |
161 | 98 | break; | |
162 | 158 | case 'c': | |
163 | 158 | type.is_complex = true; | |
164 | 158 | break; | |
165 | 52 | case 'u': | |
166 | 52 | type.is_signed = false; | |
167 | 52 | break; | |
168 | ✗ | default: | |
169 | ✗ | throw std::string("Error: no such type: '") + name[i] + "'"; | |
170 | } | ||
171 | } | ||
172 | |||
173 | 546 | return type; | |
174 | 420 | } | |
175 | |||
176 | 236 | std::vector<std::string> split_signature(const std::string& protokernel_signature) | |
177 | { | ||
178 | 236 | std::vector<std::string> signature_tokens; | |
179 | 236 | std::string token; | |
180 |
2/2✓ Branch 1 taken 6130 times.
✓ Branch 2 taken 236 times.
|
6366 | for (unsigned int loc = 0; loc < protokernel_signature.size(); ++loc) { |
181 |
3/4✓ Branch 1 taken 6130 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 966 times.
✓ Branch 4 taken 5164 times.
|
6130 | if (protokernel_signature.at(loc) == '_') { |
182 | // this is a break | ||
183 |
1/2✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
|
966 | signature_tokens.push_back(token); |
184 |
1/2✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
|
966 | token = ""; |
185 | } else { | ||
186 |
2/4✓ Branch 1 taken 5164 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5164 times.
✗ Branch 5 not taken.
|
5164 | token.push_back(protokernel_signature.at(loc)); |
187 | } | ||
188 | } | ||
189 | // Get the last one to the end of the string | ||
190 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | signature_tokens.push_back(token); |
191 | 472 | return signature_tokens; | |
192 | 236 | } | |
193 | |||
194 | 236 | static void get_signatures_from_name(std::vector<volk_type_t>& inputsig, | |
195 | std::vector<volk_type_t>& outputsig, | ||
196 | std::string name) | ||
197 | { | ||
198 | |||
199 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | std::vector<std::string> toked = split_signature(name); |
200 | |||
201 |
2/4✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 236 times.
|
236 | assert(toked[0] == "volk"); |
202 |
1/2✓ Branch 3 taken 236 times.
✗ Branch 4 not taken.
|
236 | toked.erase(toked.begin()); |
203 | |||
204 | // ok. we're assuming a string in the form | ||
205 | //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) | ||
206 | |||
207 | 236 | enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; | |
208 | 236 | std::string fn_name; | |
209 | 236 | volk_type_t type; | |
210 |
2/2✓ Branch 1 taken 966 times.
✓ Branch 2 taken 236 times.
|
1202 | for (unsigned int token_index = 0; token_index < toked.size(); ++token_index) { |
211 |
1/2✓ Branch 2 taken 966 times.
✗ Branch 3 not taken.
|
966 | std::string token = toked[token_index]; |
212 | try { | ||
213 |
3/4✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 546 times.
✓ Branch 5 taken 420 times.
|
1386 | type = volk_type_from_string(token); |
214 |
2/2✓ Branch 0 taken 234 times.
✓ Branch 1 taken 312 times.
|
546 | if (side == SIDE_NAME) |
215 | 234 | side = SIDE_OUTPUT; // if this is the first one after the name... | |
216 | |||
217 |
2/2✓ Branch 0 taken 312 times.
✓ Branch 1 taken 234 times.
|
546 | if (side == SIDE_INPUT) |
218 |
1/2✓ Branch 1 taken 312 times.
✗ Branch 2 not taken.
|
312 | inputsig.push_back(type); |
219 | else | ||
220 |
1/2✓ Branch 1 taken 234 times.
✗ Branch 2 not taken.
|
234 | outputsig.push_back(type); |
221 | 420 | } catch (...) { | |
222 |
6/8✓ Branch 1 taken 420 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 80 times.
✓ Branch 4 taken 340 times.
✓ Branch 6 taken 80 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 80 times.
✓ Branch 9 taken 340 times.
|
500 | if (token[0] == 'x' && (token.size() > 1) && |
223 |
4/8✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 80 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 80 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 80 times.
✗ Branch 9 not taken.
|
80 | (token[1] > '0' && token[1] < '9')) { // it's a multiplier |
224 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 14 times.
|
80 | if (side == SIDE_INPUT) |
225 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 66 times.
|
66 | assert(inputsig.size() > 0); |
226 | else | ||
227 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
|
14 | assert(outputsig.size() > 0); |
228 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | int multiplier = volk_lexical_cast<int>( |
229 |
1/2✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
|
160 | token.substr(1, token.size() - 1)); // will throw if invalid |
230 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 80 times.
|
164 | for (int i = 1; i < multiplier; i++) { |
231 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 14 times.
|
84 | if (side == SIDE_INPUT) |
232 |
1/2✓ Branch 2 taken 70 times.
✗ Branch 3 not taken.
|
70 | inputsig.push_back(inputsig.back()); |
233 | else | ||
234 |
1/2✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
|
14 | outputsig.push_back(outputsig.back()); |
235 | } | ||
236 |
2/2✓ Branch 0 taken 236 times.
✓ Branch 1 taken 104 times.
|
340 | } else if (side == |
237 | SIDE_INPUT) { // it's the function name, at least it better be | ||
238 | 236 | side = SIDE_NAME; | |
239 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | fn_name.append("_"); |
240 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | fn_name.append(token); |
241 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 104 times.
|
104 | } else if (side == SIDE_OUTPUT) { |
242 | ✗ | if (token != toked.back()) | |
243 | ✗ | throw; // the last token in the name is the alignment | |
244 | } | ||
245 |
1/2✓ Branch 1 taken 420 times.
✗ Branch 2 not taken.
|
420 | } |
246 | 966 | } | |
247 | // we don't need an output signature (some fn's operate on the input data, "in | ||
248 | // place"), but we do need at least one input! | ||
249 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 236 times.
|
236 | assert(inputsig.size() != 0); |
250 | 236 | } | |
251 | |||
252 | ✗ | inline void run_cast_test1(volk_fn_1arg func, | |
253 | std::vector<void*>& buffs, | ||
254 | unsigned int vlen, | ||
255 | unsigned int iter, | ||
256 | std::string arch) | ||
257 | { | ||
258 | ✗ | while (iter--) | |
259 | ✗ | func(buffs[0], vlen, arch.c_str()); | |
260 | ✗ | } | |
261 | |||
262 | 534 | inline void run_cast_test2(volk_fn_2arg func, | |
263 | std::vector<void*>& buffs, | ||
264 | unsigned int vlen, | ||
265 | unsigned int iter, | ||
266 | std::string arch) | ||
267 | { | ||
268 |
2/2✓ Branch 0 taken 534 times.
✓ Branch 1 taken 534 times.
|
1068 | while (iter--) |
269 | 534 | func(buffs[0], buffs[1], vlen, arch.c_str()); | |
270 | 534 | } | |
271 | |||
272 | 426 | inline void run_cast_test3(volk_fn_3arg func, | |
273 | std::vector<void*>& buffs, | ||
274 | unsigned int vlen, | ||
275 | unsigned int iter, | ||
276 | std::string arch) | ||
277 | { | ||
278 |
2/2✓ Branch 0 taken 426 times.
✓ Branch 1 taken 426 times.
|
852 | while (iter--) |
279 | 426 | func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); | |
280 | 426 | } | |
281 | |||
282 | 22 | inline void run_cast_test4(volk_fn_4arg func, | |
283 | std::vector<void*>& buffs, | ||
284 | unsigned int vlen, | ||
285 | unsigned int iter, | ||
286 | std::string arch) | ||
287 | { | ||
288 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 22 times.
|
44 | while (iter--) |
289 | 22 | func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); | |
290 | 22 | } | |
291 | |||
292 | 10 | inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, | |
293 | std::vector<void*>& buffs, | ||
294 | float scalar, | ||
295 | unsigned int vlen, | ||
296 | unsigned int iter, | ||
297 | std::string arch) | ||
298 | { | ||
299 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | while (iter--) |
300 | 10 | func(buffs[0], scalar, vlen, arch.c_str()); | |
301 | 10 | } | |
302 | |||
303 | 246 | inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, | |
304 | std::vector<void*>& buffs, | ||
305 | float scalar, | ||
306 | unsigned int vlen, | ||
307 | unsigned int iter, | ||
308 | std::string arch) | ||
309 | { | ||
310 |
2/2✓ Branch 0 taken 246 times.
✓ Branch 1 taken 246 times.
|
492 | while (iter--) |
311 | 246 | func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); | |
312 | 246 | } | |
313 | |||
314 | 56 | inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, | |
315 | std::vector<void*>& buffs, | ||
316 | float scalar, | ||
317 | unsigned int vlen, | ||
318 | unsigned int iter, | ||
319 | std::string arch) | ||
320 | { | ||
321 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 56 times.
|
112 | while (iter--) |
322 | 56 | func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); | |
323 | 56 | } | |
324 | |||
325 | ✗ | inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, | |
326 | std::vector<void*>& buffs, | ||
327 | lv_32fc_t scalar, | ||
328 | unsigned int vlen, | ||
329 | unsigned int iter, | ||
330 | std::string arch) | ||
331 | { | ||
332 | ✗ | while (iter--) | |
333 | ✗ | func(buffs[0], scalar, vlen, arch.c_str()); | |
334 | ✗ | } | |
335 | |||
336 | 30 | inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, | |
337 | std::vector<void*>& buffs, | ||
338 | lv_32fc_t scalar, | ||
339 | unsigned int vlen, | ||
340 | unsigned int iter, | ||
341 | std::string arch) | ||
342 | { | ||
343 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | while (iter--) |
344 | 30 | func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); | |
345 | 30 | } | |
346 | |||
347 | 10 | inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, | |
348 | std::vector<void*>& buffs, | ||
349 | lv_32fc_t scalar, | ||
350 | unsigned int vlen, | ||
351 | unsigned int iter, | ||
352 | std::string arch) | ||
353 | { | ||
354 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | while (iter--) |
355 | 10 | func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); | |
356 | 10 | } | |
357 | |||
358 | template <class t> | ||
359 | 2560 | bool fcompare(t* in1, t* in2, unsigned int vlen, float tol, bool absolute_mode) | |
360 | { | ||
361 | 2560 | bool fail = false; | |
362 | 2560 | int print_max_errs = 10; | |
363 |
2/2✓ Branch 0 taken 167777280 times.
✓ Branch 1 taken 1280 times.
|
335557120 | for (unsigned int i = 0; i < vlen; i++) { |
364 |
2/2✓ Branch 0 taken 13631904 times.
✓ Branch 1 taken 154145376 times.
|
335554560 | if (absolute_mode) { |
365 |
1/3✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 13631904 times.
|
27263808 | if (fabs(((t*)(in1))[i] - ((t*)(in2))[i]) > tol) { |
366 | ✗ | fail = true; | |
367 | ✗ | if (print_max_errs-- > 0) { | |
368 | ✗ | std::cout << "offset " << i << " in1: " << t(((t*)(in1))[i]) | |
369 | ✗ | << " in2: " << t(((t*)(in2))[i]); | |
370 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
371 | } | ||
372 | } | ||
373 | } else { | ||
374 | // for very small numbers we'll see round off errors due to limited | ||
375 | // precision. So a special test case... | ||
376 |
3/3✓ Branch 0 taken 360 times.
✓ Branch 1 taken 26561544 times.
✓ Branch 2 taken 127583472 times.
|
308290752 | if (fabs(((t*)(in1))[i]) < 1e-30) { |
377 |
2/3✗ Branch 0 not taken.
✓ Branch 1 taken 360 times.
✓ Branch 2 taken 7424808 times.
|
14850336 | if (fabs(((t*)(in2))[i]) > tol) { |
378 | ✗ | fail = true; | |
379 | ✗ | if (print_max_errs-- > 0) { | |
380 | ✗ | std::cout << "offset " << i << " in1: " << t(((t*)(in1))[i]) | |
381 | ✗ | << " in2: " << t(((t*)(in2))[i]); | |
382 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
383 | } | ||
384 | } | ||
385 | } | ||
386 | // the primary test is the percent different greater than given tol | ||
387 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 19136736 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 127583472 times.
|
293440416 | else if (fabs(((t*)(in1))[i] - ((t*)(in2))[i]) / fabs(((t*)in1)[i]) > tol) { |
388 | ✗ | fail = true; | |
389 | ✗ | if (print_max_errs-- > 0) { | |
390 | ✗ | std::cout << "offset " << i << " in1: " << t(((t*)(in1))[i]) | |
391 | ✗ | << " in2: " << t(((t*)(in2))[i]); | |
392 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
393 | } | ||
394 | } | ||
395 | } | ||
396 | } | ||
397 | |||
398 | 2560 | return fail; | |
399 | } | ||
400 | |||
401 | template <class t> | ||
402 | 1216 | bool ccompare(t* in1, t* in2, unsigned int vlen, float tol, bool absolute_mode) | |
403 | { | ||
404 | 1216 | bool fail = false; | |
405 | 1216 | int print_max_errs = 10; | |
406 |
2/2✓ Branch 0 taken 79694208 times.
✓ Branch 1 taken 608 times.
|
159389632 | for (unsigned int i = 0; i < 2 * vlen; i += 2) { |
407 |
3/6✓ Branch 2 taken 79694208 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 79694208 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 79694208 times.
✗ Branch 8 not taken.
|
478165248 | if (std::isnan(in1[i]) || std::isnan(in1[i + 1]) || std::isnan(in2[i]) || |
408 |
3/6✓ Branch 2 taken 79694208 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 79694208 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 79694208 times.
✗ Branch 8 not taken.
|
478165248 | std::isnan(in2[i + 1]) || std::isinf(in1[i]) || std::isinf(in1[i + 1]) || |
409 |
3/6✓ Branch 0 taken 79694208 times.
✗ Branch 1 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 79694208 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 79694208 times.
|
478165248 | std::isinf(in2[i]) || std::isinf(in2[i + 1])) { |
410 | ✗ | fail = true; | |
411 | ✗ | if (print_max_errs-- > 0) { | |
412 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
413 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] | |
414 | ✗ | << "j"; | |
415 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
416 | } | ||
417 | } | ||
418 | 159388416 | t diff[2] = { in1[i] - in2[i], in1[i + 1] - in2[i + 1] }; | |
419 | 159388416 | t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]); | |
420 | 159388416 | t norm = std::sqrt(in1[i] * in1[i] + in1[i + 1] * in1[i + 1]); | |
421 | |||
422 |
2/2✓ Branch 0 taken 6291648 times.
✓ Branch 1 taken 73402560 times.
|
159388416 | if (absolute_mode) { |
423 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6291648 times.
|
12583296 | if (err > tol) { |
424 | ✗ | fail = true; | |
425 | ✗ | if (print_max_errs-- > 0) { | |
426 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
427 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] | |
428 | ✗ | << "j"; | |
429 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
430 | } | ||
431 | } | ||
432 | } else { | ||
433 | // for very small numbers we'll see round off errors due to limited | ||
434 | // precision. So a special test case... | ||
435 |
2/2✓ Branch 0 taken 5268378 times.
✓ Branch 1 taken 68134182 times.
|
146805120 | if (norm < 1e-30) { |
436 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5268378 times.
|
10536756 | if (err > tol) { |
437 | ✗ | fail = true; | |
438 | ✗ | if (print_max_errs-- > 0) { | |
439 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
440 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " | |
441 | ✗ | << in2[i + 1] << "j"; | |
442 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
443 | } | ||
444 | } | ||
445 | } | ||
446 | // the primary test is the percent different greater than given tol | ||
447 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 68134182 times.
|
136268364 | else if ((err / norm) > tol) { |
448 | ✗ | fail = true; | |
449 | ✗ | if (print_max_errs-- > 0) { | |
450 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
451 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] | |
452 | ✗ | << "j"; | |
453 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
454 | } | ||
455 | } | ||
456 | } | ||
457 | } | ||
458 | |||
459 | 1216 | return fail; | |
460 | } | ||
461 | |||
462 | template <class t> | ||
463 | 1476 | bool icompare(t* in1, t* in2, unsigned int vlen, unsigned int tol, bool absolute_mode) | |
464 | { | ||
465 | 1476 | bool fail = false; | |
466 | 1476 | int print_max_errs = 10; | |
467 |
2/2✓ Branch 0 taken 123473592 times.
✓ Branch 1 taken 738 times.
|
246948660 | for (unsigned int i = 0; i < vlen; i++) { |
468 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 123473592 times.
|
246947184 | if (((unsigned int)abs(int(((t*)(in1))[i]) - int(((t*)(in2))[i]))) > tol) { |
469 | ✗ | fail = true; | |
470 | ✗ | if (print_max_errs-- > 0) { | |
471 | ✗ | std::cout << "offset " << i | |
472 | ✗ | << " in1: " << static_cast<int>(t(((t*)(in1))[i])) | |
473 | ✗ | << " in2: " << static_cast<int>(t(((t*)(in2))[i])); | |
474 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
475 | } | ||
476 | } | ||
477 | } | ||
478 | |||
479 | 1476 | return fail; | |
480 | } | ||
481 | |||
482 | class volk_qa_aligned_mem_pool | ||
483 | { | ||
484 | public: | ||
485 | 3514 | void* get_new(size_t size) | |
486 | { | ||
487 |
1/2✓ Branch 1 taken 3514 times.
✗ Branch 2 not taken.
|
3514 | size_t alignment = volk_get_alignment(); |
488 |
1/2✓ Branch 1 taken 3514 times.
✗ Branch 2 not taken.
|
3514 | void* ptr = volk_malloc(size, alignment); |
489 | 3514 | memset(ptr, 0x00, size); | |
490 |
1/2✓ Branch 1 taken 3514 times.
✗ Branch 2 not taken.
|
3514 | _mems.push_back(ptr); |
491 | 3514 | return ptr; | |
492 | } | ||
493 | 236 | ~volk_qa_aligned_mem_pool() | |
494 | { | ||
495 |
2/2✓ Branch 1 taken 3514 times.
✓ Branch 2 taken 236 times.
|
3750 | for (unsigned int ii = 0; ii < _mems.size(); ++ii) { |
496 | 3514 | volk_free(_mems[ii]); | |
497 | } | ||
498 | 236 | } | |
499 | |||
500 | private: | ||
501 | std::vector<void*> _mems; | ||
502 | }; | ||
503 | |||
504 | 236 | bool run_volk_tests(volk_func_desc_t desc, | |
505 | void (*manual_func)(), | ||
506 | std::string name, | ||
507 | volk_test_params_t test_params, | ||
508 | std::vector<volk_test_results_t>* results, | ||
509 | std::string puppet_master_name) | ||
510 | { | ||
511 |
3/6✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 8 taken 236 times.
✗ Branch 9 not taken.
✓ Branch 11 taken 236 times.
✗ Branch 12 not taken.
|
472 | return run_volk_tests(desc, |
512 | manual_func, | ||
513 | name, | ||
514 | test_params.tol(), | ||
515 | test_params.scalar(), | ||
516 | test_params.vlen(), | ||
517 | test_params.iter(), | ||
518 | results, | ||
519 | puppet_master_name, | ||
520 | 236 | test_params.absolute_mode(), | |
521 | 708 | test_params.benchmark_mode()); | |
522 | } | ||
523 | |||
524 | 236 | bool run_volk_tests(volk_func_desc_t desc, | |
525 | void (*manual_func)(), | ||
526 | std::string name, | ||
527 | float tol, | ||
528 | lv_32fc_t scalar, | ||
529 | unsigned int vlen, | ||
530 | unsigned int iter, | ||
531 | std::vector<volk_test_results_t>* results, | ||
532 | std::string puppet_master_name, | ||
533 | bool absolute_mode, | ||
534 | bool benchmark_mode) | ||
535 | { | ||
536 | // Initialize this entry in results vector | ||
537 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->push_back(volk_test_results_t()); |
538 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->back().name = name; |
539 | 236 | results->back().vlen = vlen; | |
540 | 236 | results->back().iter = iter; | |
541 |
7/14✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 236 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 236 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 236 times.
✗ Branch 14 not taken.
✓ Branch 16 taken 236 times.
✗ Branch 17 not taken.
✓ Branch 19 taken 236 times.
✗ Branch 20 not taken.
|
236 | std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" |
542 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | << std::endl; |
543 | |||
544 | // vlen_twiddle will increase vlen for malloc and data generation | ||
545 | // but kernels will still be called with the user provided vlen. | ||
546 | // This is useful for causing errors in kernels that do bad reads | ||
547 | 236 | const unsigned int vlen_twiddle = 5; | |
548 | 236 | vlen = vlen + vlen_twiddle; | |
549 | |||
550 | 236 | const float tol_f = tol; | |
551 | 236 | const unsigned int tol_i = static_cast<const unsigned int>(tol); | |
552 | |||
553 | // first let's get a list of available architectures for the test | ||
554 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | std::vector<std::string> arch_list = get_arch_list(desc); |
555 | |||
556 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 236 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 236 times.
|
236 | if ((!benchmark_mode) && (arch_list.size() < 2)) { |
557 | ✗ | std::cout << "no architectures to test" << std::endl; | |
558 | ✗ | return false; | |
559 | } | ||
560 | |||
561 | // something that can hang onto memory and cleanup when this function exits | ||
562 | 236 | volk_qa_aligned_mem_pool mem_pool; | |
563 | |||
564 | // now we have to get a function signature by parsing the name | ||
565 | 236 | std::vector<volk_type_t> inputsig, outputsig; | |
566 | try { | ||
567 |
2/4✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
|
236 | get_signatures_from_name(inputsig, outputsig, name); |
568 | ✗ | } catch (std::exception& error) { | |
569 | ✗ | std::cerr << "Error: unable to get function signature from kernel name" | |
570 | ✗ | << std::endl; | |
571 | ✗ | std::cerr << " - " << name << std::endl; | |
572 | ✗ | return false; | |
573 | ✗ | } | |
574 | |||
575 | // pull the input scalars into their own vector | ||
576 | 236 | std::vector<volk_type_t> inputsc; | |
577 |
2/2✓ Branch 1 taken 382 times.
✓ Branch 2 taken 236 times.
|
618 | for (size_t i = 0; i < inputsig.size(); i++) { |
578 |
2/2✓ Branch 1 taken 62 times.
✓ Branch 2 taken 320 times.
|
382 | if (inputsig[i].is_scalar) { |
579 |
1/2✓ Branch 2 taken 62 times.
✗ Branch 3 not taken.
|
62 | inputsc.push_back(inputsig[i]); |
580 |
1/2✓ Branch 4 taken 62 times.
✗ Branch 5 not taken.
|
62 | inputsig.erase(inputsig.begin() + i); |
581 | 62 | i -= 1; | |
582 | } | ||
583 | } | ||
584 | 236 | std::vector<void*> inbuffs; | |
585 |
2/2✓ Branch 1 taken 320 times.
✓ Branch 2 taken 236 times.
|
556 | for (unsigned int inputsig_index = 0; inputsig_index < inputsig.size(); |
586 | ++inputsig_index) { | ||
587 |
1/2✓ Branch 2 taken 320 times.
✗ Branch 3 not taken.
|
320 | volk_type_t sig = inputsig[inputsig_index]; |
588 |
1/2✓ Branch 0 taken 320 times.
✗ Branch 1 not taken.
|
320 | if (!sig.is_scalar) // we don't make buffers for scalars |
589 | 320 | inbuffs.push_back( | |
590 |
4/6✓ Branch 0 taken 130 times.
✓ Branch 1 taken 190 times.
✓ Branch 3 taken 320 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 320 times.
✗ Branch 7 not taken.
|
320 | mem_pool.get_new(vlen * sig.size * (sig.is_complex ? 2 : 1))); |
591 | 320 | } | |
592 |
2/2✓ Branch 1 taken 320 times.
✓ Branch 2 taken 236 times.
|
556 | for (size_t i = 0; i < inbuffs.size(); i++) { |
593 |
2/4✓ Branch 2 taken 320 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 320 times.
✗ Branch 7 not taken.
|
320 | load_random_data(inbuffs[i], inputsig[i], vlen); |
594 | } | ||
595 | |||
596 | // ok let's make a vector of vector of void buffers, which holds the input/output | ||
597 | // vectors for each arch | ||
598 | 236 | std::vector<std::vector<void*>> test_data; | |
599 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
600 | 1334 | std::vector<void*> arch_buffs; | |
601 |
2/2✓ Branch 1 taken 1396 times.
✓ Branch 2 taken 1334 times.
|
2730 | for (size_t j = 0; j < outputsig.size(); j++) { |
602 |
2/4✓ Branch 2 taken 1396 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1396 times.
✗ Branch 6 not taken.
|
2792 | arch_buffs.push_back(mem_pool.get_new(vlen * outputsig[j].size * |
603 |
2/2✓ Branch 1 taken 272 times.
✓ Branch 2 taken 1124 times.
|
1396 | (outputsig[j].is_complex ? 2 : 1))); |
604 | } | ||
605 |
2/2✓ Branch 1 taken 1798 times.
✓ Branch 2 taken 1334 times.
|
3132 | for (size_t j = 0; j < inputsig.size(); j++) { |
606 |
1/2✓ Branch 2 taken 1798 times.
✗ Branch 3 not taken.
|
3596 | void* arch_inbuff = mem_pool.get_new(vlen * inputsig[j].size * |
607 |
2/2✓ Branch 1 taken 718 times.
✓ Branch 2 taken 1080 times.
|
1798 | (inputsig[j].is_complex ? 2 : 1)); |
608 | 3596 | memcpy(arch_inbuff, | |
609 | 1798 | inbuffs[j], | |
610 |
2/2✓ Branch 2 taken 718 times.
✓ Branch 3 taken 1080 times.
|
1798 | vlen * inputsig[j].size * (inputsig[j].is_complex ? 2 : 1)); |
611 |
1/2✓ Branch 1 taken 1798 times.
✗ Branch 2 not taken.
|
1798 | arch_buffs.push_back(arch_inbuff); |
612 | } | ||
613 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | test_data.push_back(arch_buffs); |
614 | 1334 | } | |
615 | |||
616 | 236 | std::vector<volk_type_t> both_sigs; | |
617 |
1/2✓ Branch 5 taken 236 times.
✗ Branch 6 not taken.
|
236 | both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); |
618 |
1/2✓ Branch 5 taken 236 times.
✗ Branch 6 not taken.
|
236 | both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); |
619 | |||
620 | // now run the test | ||
621 | 236 | vlen = vlen - vlen_twiddle; | |
622 | 236 | std::chrono::time_point<std::chrono::system_clock> start, end; | |
623 | 236 | std::vector<double> profile_times; | |
624 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
625 | 1334 | start = std::chrono::system_clock::now(); | |
626 | |||
627 |
4/5✓ Branch 1 taken 10 times.
✓ Branch 2 taken 810 times.
✓ Branch 3 taken 492 times.
✓ Branch 4 taken 22 times.
✗ Branch 5 not taken.
|
1334 | switch (both_sigs.size()) { |
628 | 10 | case 1: | |
629 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
|
10 | if (inputsc.size() == 0) { |
630 | ✗ | run_cast_test1( | |
631 | ✗ | (volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
632 |
3/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 10 times.
✗ Branch 7 not taken.
|
10 | } else if (inputsc.size() == 1 && inputsc[0].is_float) { |
633 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
|
10 | if (inputsc[0].is_complex) { |
634 | ✗ | run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), | |
635 | ✗ | test_data[i], | |
636 | scalar, | ||
637 | vlen, | ||
638 | iter, | ||
639 | ✗ | arch_list[i]); | |
640 | } else { | ||
641 |
2/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
|
20 | run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), |
642 | 10 | test_data[i], | |
643 | scalar.real(), | ||
644 | vlen, | ||
645 | iter, | ||
646 | 10 | arch_list[i]); | |
647 | } | ||
648 | } else | ||
649 | ✗ | throw "unsupported 1 arg function >1 scalars"; | |
650 | 10 | break; | |
651 | 810 | case 2: | |
652 |
2/2✓ Branch 1 taken 534 times.
✓ Branch 2 taken 276 times.
|
810 | if (inputsc.size() == 0) { |
653 |
2/4✓ Branch 1 taken 534 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 534 times.
✗ Branch 5 not taken.
|
1068 | run_cast_test2( |
654 | 1068 | (volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
655 |
3/6✓ Branch 1 taken 276 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 276 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 276 times.
✗ Branch 7 not taken.
|
276 | } else if (inputsc.size() == 1 && inputsc[0].is_float) { |
656 |
2/2✓ Branch 1 taken 30 times.
✓ Branch 2 taken 246 times.
|
276 | if (inputsc[0].is_complex) { |
657 |
2/4✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 30 times.
✗ Branch 5 not taken.
|
60 | run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), |
658 | 30 | test_data[i], | |
659 | scalar, | ||
660 | vlen, | ||
661 | iter, | ||
662 | 30 | arch_list[i]); | |
663 | } else { | ||
664 |
2/4✓ Branch 1 taken 246 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 246 times.
✗ Branch 6 not taken.
|
492 | run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), |
665 | 246 | test_data[i], | |
666 | scalar.real(), | ||
667 | vlen, | ||
668 | iter, | ||
669 | 246 | arch_list[i]); | |
670 | } | ||
671 | } else | ||
672 | ✗ | throw "unsupported 2 arg function >1 scalars"; | |
673 | 810 | break; | |
674 | 492 | case 3: | |
675 |
2/2✓ Branch 1 taken 426 times.
✓ Branch 2 taken 66 times.
|
492 | if (inputsc.size() == 0) { |
676 |
2/4✓ Branch 1 taken 426 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 426 times.
✗ Branch 5 not taken.
|
852 | run_cast_test3( |
677 | 852 | (volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
678 |
3/6✓ Branch 1 taken 66 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 66 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 66 times.
✗ Branch 7 not taken.
|
66 | } else if (inputsc.size() == 1 && inputsc[0].is_float) { |
679 |
2/2✓ Branch 1 taken 10 times.
✓ Branch 2 taken 56 times.
|
66 | if (inputsc[0].is_complex) { |
680 |
2/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
|
20 | run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), |
681 | 10 | test_data[i], | |
682 | scalar, | ||
683 | vlen, | ||
684 | iter, | ||
685 | 10 | arch_list[i]); | |
686 | } else { | ||
687 |
2/4✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 56 times.
✗ Branch 6 not taken.
|
112 | run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), |
688 | 56 | test_data[i], | |
689 | scalar.real(), | ||
690 | vlen, | ||
691 | iter, | ||
692 | 56 | arch_list[i]); | |
693 | } | ||
694 | } else | ||
695 | ✗ | throw "unsupported 3 arg function >1 scalars"; | |
696 | 492 | break; | |
697 | 22 | case 4: | |
698 |
2/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 22 times.
✗ Branch 5 not taken.
|
44 | run_cast_test4( |
699 | 44 | (volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
700 | 22 | break; | |
701 | ✗ | default: | |
702 | ✗ | throw "no function handler for this signature"; | |
703 | break; | ||
704 | } | ||
705 | |||
706 | 1334 | end = std::chrono::system_clock::now(); | |
707 |
2/4✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1334 times.
✗ Branch 5 not taken.
|
1334 | std::chrono::duration<double> elapsed_seconds = end - start; |
708 | 1334 | double arch_time = 1000.0 * elapsed_seconds.count(); | |
709 |
5/10✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1334 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 1334 times.
✗ Branch 9 not taken.
✓ Branch 11 taken 1334 times.
✗ Branch 12 not taken.
✓ Branch 14 taken 1334 times.
✗ Branch 15 not taken.
|
1334 | std::cout << arch_list[i] << " completed in " << arch_time << " ms" << std::endl; |
710 | 2668 | volk_test_time_t result; | |
711 |
1/2✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
|
1334 | result.name = arch_list[i]; |
712 | 1334 | result.time = arch_time; | |
713 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | result.units = "ms"; |
714 | 1334 | result.pass = true; | |
715 |
2/4✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1334 times.
✗ Branch 6 not taken.
|
1334 | results->back().results[result.name] = result; |
716 | |||
717 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | profile_times.push_back(arch_time); |
718 | } | ||
719 | |||
720 | // and now compare each output to the generic output | ||
721 | // first we have to know which output is the generic one, they aren't in order... | ||
722 | 236 | size_t generic_offset = 0; | |
723 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
724 |
3/4✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 232 times.
✓ Branch 5 taken 1102 times.
|
1334 | if (arch_list[i] == "generic") { |
725 | 232 | generic_offset = i; | |
726 | } | ||
727 | } | ||
728 | |||
729 | // Just in case a kernel wrote to OOB memory, use the twiddled vlen | ||
730 | 236 | vlen = vlen + vlen_twiddle; | |
731 | bool fail; | ||
732 | 236 | bool fail_global = false; | |
733 | 472 | std::vector<bool> arch_results; | |
734 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
735 | 1334 | fail = false; | |
736 |
2/2✓ Branch 0 taken 1098 times.
✓ Branch 1 taken 236 times.
|
1334 | if (i != generic_offset) { |
737 |
2/2✓ Branch 1 taken 2626 times.
✓ Branch 2 taken 1098 times.
|
3724 | for (size_t j = 0; j < both_sigs.size(); j++) { |
738 |
2/2✓ Branch 1 taken 1888 times.
✓ Branch 2 taken 738 times.
|
2626 | if (both_sigs[j].is_float) { |
739 |
2/2✓ Branch 1 taken 146 times.
✓ Branch 2 taken 1742 times.
|
1888 | if (both_sigs[j].size == 8) { |
740 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 146 times.
|
146 | if (both_sigs[j].is_complex) { |
741 | ✗ | fail = ccompare((double*)test_data[generic_offset][j], | |
742 | ✗ | (double*)test_data[i][j], | |
743 | vlen, | ||
744 | tol_f, | ||
745 | absolute_mode); | ||
746 | } else { | ||
747 |
1/2✓ Branch 3 taken 146 times.
✗ Branch 4 not taken.
|
146 | fail = fcompare((double*)test_data[generic_offset][j], |
748 | 146 | (double*)test_data[i][j], | |
749 | vlen, | ||
750 | tol_f, | ||
751 | absolute_mode); | ||
752 | } | ||
753 | } else { | ||
754 |
2/2✓ Branch 1 taken 608 times.
✓ Branch 2 taken 1134 times.
|
1742 | if (both_sigs[j].is_complex) { |
755 |
1/2✓ Branch 3 taken 608 times.
✗ Branch 4 not taken.
|
608 | fail = ccompare((float*)test_data[generic_offset][j], |
756 | 608 | (float*)test_data[i][j], | |
757 | vlen, | ||
758 | tol_f, | ||
759 | absolute_mode); | ||
760 | } else { | ||
761 |
1/2✓ Branch 3 taken 1134 times.
✗ Branch 4 not taken.
|
1134 | fail = fcompare((float*)test_data[generic_offset][j], |
762 | 1134 | (float*)test_data[i][j], | |
763 | vlen, | ||
764 | tol_f, | ||
765 | absolute_mode); | ||
766 | } | ||
767 | } | ||
768 | } else { | ||
769 | // i could replace this whole switch statement with a memcmp if i | ||
770 | // wasn't interested in printing the outputs where they differ | ||
771 |
4/5✓ Branch 1 taken 28 times.
✓ Branch 2 taken 170 times.
✓ Branch 3 taken 348 times.
✓ Branch 4 taken 192 times.
✗ Branch 5 not taken.
|
738 | switch (both_sigs[j].size) { |
772 | 28 | case 8: | |
773 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
|
28 | if (both_sigs[j].is_signed) { |
774 | ✗ | fail = icompare((int64_t*)test_data[generic_offset][j], | |
775 | ✗ | (int64_t*)test_data[i][j], | |
776 | ✗ | vlen * (both_sigs[j].is_complex ? 2 : 1), | |
777 | tol_i, | ||
778 | absolute_mode); | ||
779 | } else { | ||
780 |
1/2✓ Branch 3 taken 28 times.
✗ Branch 4 not taken.
|
28 | fail = icompare((uint64_t*)test_data[generic_offset][j], |
781 | 28 | (uint64_t*)test_data[i][j], | |
782 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
|
28 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
783 | tol_i, | ||
784 | absolute_mode); | ||
785 | } | ||
786 | 28 | break; | |
787 | 170 | case 4: | |
788 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 170 times.
|
170 | if (both_sigs[j].is_complex) { |
789 | ✗ | if (both_sigs[j].is_signed) { | |
790 | ✗ | fail = icompare((int16_t*)test_data[generic_offset][j], | |
791 | ✗ | (int16_t*)test_data[i][j], | |
792 | ✗ | vlen * (both_sigs[j].is_complex ? 2 : 1), | |
793 | tol_i, | ||
794 | absolute_mode); | ||
795 | } else { | ||
796 | ✗ | fail = icompare((uint16_t*)test_data[generic_offset][j], | |
797 | ✗ | (uint16_t*)test_data[i][j], | |
798 | ✗ | vlen * (both_sigs[j].is_complex ? 2 : 1), | |
799 | tol_i, | ||
800 | absolute_mode); | ||
801 | } | ||
802 | } else { | ||
803 |
2/2✓ Branch 1 taken 82 times.
✓ Branch 2 taken 88 times.
|
170 | if (both_sigs[j].is_signed) { |
804 |
1/2✓ Branch 3 taken 82 times.
✗ Branch 4 not taken.
|
82 | fail = icompare((int32_t*)test_data[generic_offset][j], |
805 | 82 | (int32_t*)test_data[i][j], | |
806 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 82 times.
|
82 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
807 | tol_i, | ||
808 | absolute_mode); | ||
809 | } else { | ||
810 |
1/2✓ Branch 3 taken 88 times.
✗ Branch 4 not taken.
|
88 | fail = icompare((uint32_t*)test_data[generic_offset][j], |
811 | 88 | (uint32_t*)test_data[i][j], | |
812 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 88 times.
|
88 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
813 | tol_i, | ||
814 | absolute_mode); | ||
815 | } | ||
816 | } | ||
817 | 170 | break; | |
818 | 348 | case 2: | |
819 |
2/2✓ Branch 1 taken 296 times.
✓ Branch 2 taken 52 times.
|
348 | if (both_sigs[j].is_signed) { |
820 |
1/2✓ Branch 3 taken 296 times.
✗ Branch 4 not taken.
|
296 | fail = icompare((int16_t*)test_data[generic_offset][j], |
821 | 296 | (int16_t*)test_data[i][j], | |
822 |
2/2✓ Branch 1 taken 140 times.
✓ Branch 2 taken 156 times.
|
296 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
823 | tol_i, | ||
824 | absolute_mode); | ||
825 | } else { | ||
826 |
1/2✓ Branch 3 taken 52 times.
✗ Branch 4 not taken.
|
52 | fail = icompare((uint16_t*)test_data[generic_offset][j], |
827 | 52 | (uint16_t*)test_data[i][j], | |
828 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 52 times.
|
52 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
829 | tol_i, | ||
830 | absolute_mode); | ||
831 | } | ||
832 | 348 | break; | |
833 | 192 | case 1: | |
834 |
2/2✓ Branch 1 taken 136 times.
✓ Branch 2 taken 56 times.
|
192 | if (both_sigs[j].is_signed) { |
835 |
1/2✓ Branch 3 taken 136 times.
✗ Branch 4 not taken.
|
136 | fail = icompare((int8_t*)test_data[generic_offset][j], |
836 | 136 | (int8_t*)test_data[i][j], | |
837 |
2/2✓ Branch 1 taken 64 times.
✓ Branch 2 taken 72 times.
|
136 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
838 | tol_i, | ||
839 | absolute_mode); | ||
840 | } else { | ||
841 |
1/2✓ Branch 3 taken 56 times.
✗ Branch 4 not taken.
|
56 | fail = icompare((uint8_t*)test_data[generic_offset][j], |
842 | 56 | (uint8_t*)test_data[i][j], | |
843 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 56 times.
|
56 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
844 | tol_i, | ||
845 | absolute_mode); | ||
846 | } | ||
847 | 192 | break; | |
848 | ✗ | default: | |
849 | ✗ | fail = 1; | |
850 | } | ||
851 | } | ||
852 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2626 times.
|
2626 | if (fail) { |
853 | ✗ | volk_test_time_t* result = &results->back().results[arch_list[i]]; | |
854 | ✗ | result->pass = false; | |
855 | ✗ | fail_global = true; | |
856 | ✗ | std::cout << name << ": fail on arch " << arch_list[i] << std::endl; | |
857 | } | ||
858 | } | ||
859 | } | ||
860 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | arch_results.push_back(!fail); |
861 | } | ||
862 | |||
863 | 236 | double best_time_a = std::numeric_limits<double>::max(); | |
864 | 236 | double best_time_u = std::numeric_limits<double>::max(); | |
865 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
708 | std::string best_arch_a = "generic"; |
866 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | std::string best_arch_u = "generic"; |
867 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
868 |
6/8✓ Branch 1 taken 875 times.
✓ Branch 2 taken 459 times.
✓ Branch 4 taken 875 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 875 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 465 times.
✓ Branch 10 taken 869 times.
|
2209 | if ((profile_times[i] < best_time_u) && arch_results[i] && |
869 |
2/2✓ Branch 0 taken 465 times.
✓ Branch 1 taken 410 times.
|
875 | desc.impl_alignment[i] == 0) { |
870 | 465 | best_time_u = profile_times[i]; | |
871 |
1/2✓ Branch 2 taken 465 times.
✗ Branch 3 not taken.
|
465 | best_arch_u = arch_list[i]; |
872 | } | ||
873 |
6/8✓ Branch 1 taken 557 times.
✓ Branch 2 taken 777 times.
✓ Branch 4 taken 557 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 557 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 557 times.
✓ Branch 10 taken 777 times.
|
1334 | if ((profile_times[i] < best_time_a) && arch_results[i]) { |
874 | 557 | best_time_a = profile_times[i]; | |
875 |
1/2✓ Branch 2 taken 557 times.
✗ Branch 3 not taken.
|
557 | best_arch_a = arch_list[i]; |
876 | } | ||
877 | } | ||
878 | |||
879 |
3/6✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 236 times.
✗ Branch 8 not taken.
|
236 | std::cout << "Best aligned arch: " << best_arch_a << std::endl; |
880 |
3/6✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 236 times.
✗ Branch 8 not taken.
|
236 | std::cout << "Best unaligned arch: " << best_arch_u << std::endl; |
881 | |||
882 |
3/4✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 210 times.
✓ Branch 4 taken 26 times.
|
236 | if (puppet_master_name == "NULL") { |
883 |
1/2✓ Branch 2 taken 210 times.
✗ Branch 3 not taken.
|
210 | results->back().config_name = name; |
884 | } else { | ||
885 |
1/2✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
|
26 | results->back().config_name = puppet_master_name; |
886 | } | ||
887 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->back().best_arch_a = best_arch_a; |
888 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->back().best_arch_u = best_arch_u; |
889 | |||
890 | 236 | return fail_global; | |
891 | 236 | } | |
892 |