| Line | Branch | Exec | Source |
|---|---|---|---|
| 1 | /* -*- c++ -*- */ | ||
| 2 | /* | ||
| 3 | * Copyright 2011 - 2020, 2022 Free Software Foundation, Inc. | ||
| 4 | * | ||
| 5 | * This file is part of VOLK | ||
| 6 | * | ||
| 7 | * SPDX-License-Identifier: LGPL-3.0-or-later | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include "qa_utils.h" | ||
| 11 | #include <volk/volk.h> | ||
| 12 | |||
| 13 | #include <volk/volk.h> // for volk_func_desc_t | ||
| 14 | #include <volk/volk_malloc.h> // for volk_free, volk_m... | ||
| 15 | |||
| 16 | #include <assert.h> // for assert | ||
| 17 | #include <stdint.h> // for uint16_t, uint64_t | ||
| 18 | #include <sys/time.h> // for CLOCKS_PER_SEC | ||
| 19 | #include <sys/types.h> // for int16_t, int32_t | ||
| 20 | #include <chrono> | ||
| 21 | #include <cmath> // for sqrt, fabs, abs | ||
| 22 | #include <cstring> // for memcpy, memset | ||
| 23 | #include <ctime> // for clock | ||
| 24 | #include <fstream> // for operator<<, basic... | ||
| 25 | #include <iostream> // for cout, cerr | ||
| 26 | #include <limits> // for numeric_limits | ||
| 27 | #include <map> // for map, map<>::mappe... | ||
| 28 | #include <random> | ||
| 29 | #include <vector> // for vector, _Bit_refe... | ||
| 30 | |||
| 31 | template <typename T> | ||
| 32 | 472 | void random_floats(void* buf, unsigned int n, std::default_random_engine& rnd_engine) | |
| 33 | { | ||
| 34 | 472 | T* array = static_cast<T*>(buf); | |
| 35 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
472 | std::uniform_real_distribution<T> uniform_dist(T(-1), T(1)); |
| 36 |
2/2✓ Branch 0 taken 42468624 times.
✓ Branch 1 taken 236 times.
|
84937720 | for (unsigned int i = 0; i < n; i++) { |
| 37 |
1/2✓ Branch 1 taken 42468624 times.
✗ Branch 2 not taken.
|
84937248 | array[i] = uniform_dist(rnd_engine); |
| 38 | } | ||
| 39 | 472 | } | |
| 40 | |||
| 41 | 320 | void load_random_data(void* data, volk_type_t type, unsigned int n) | |
| 42 | { | ||
| 43 |
1/2✓ Branch 1 taken 320 times.
✗ Branch 2 not taken.
|
320 | std::random_device rnd_device; |
| 44 |
2/4✓ Branch 1 taken 320 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 320 times.
✗ Branch 5 not taken.
|
320 | std::default_random_engine rnd_engine(rnd_device()); |
| 45 |
2/2✓ Branch 0 taken 130 times.
✓ Branch 1 taken 190 times.
|
320 | if (type.is_complex) |
| 46 | 130 | n *= 2; | |
| 47 |
2/2✓ Branch 0 taken 236 times.
✓ Branch 1 taken 84 times.
|
320 | if (type.is_float) { |
| 48 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 214 times.
|
236 | if (type.size == 8) { |
| 49 |
1/2✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
|
22 | random_floats<double>(data, n, rnd_engine); |
| 50 | } else { | ||
| 51 |
1/2✓ Branch 1 taken 214 times.
✗ Branch 2 not taken.
|
214 | random_floats<float>(data, n, rnd_engine); |
| 52 | } | ||
| 53 | } else { | ||
| 54 | 84 | float int_max = float(uint64_t(2) << (type.size * 8)); | |
| 55 |
2/2✓ Branch 0 taken 62 times.
✓ Branch 1 taken 22 times.
|
84 | if (type.is_signed) |
| 56 | 62 | int_max /= 2.0; | |
| 57 |
1/2✓ Branch 1 taken 84 times.
✗ Branch 2 not taken.
|
84 | std::uniform_real_distribution<float> uniform_dist(-int_max, int_max); |
| 58 |
2/2✓ Branch 0 taken 16515576 times.
✓ Branch 1 taken 84 times.
|
16515660 | for (unsigned int i = 0; i < n; i++) { |
| 59 |
1/2✓ Branch 1 taken 16515576 times.
✗ Branch 2 not taken.
|
16515576 | float scaled_rand = uniform_dist(rnd_engine); |
| 60 | // man i really don't know how to do this in a more clever way, you have to | ||
| 61 | // cast down at some point | ||
| 62 |
4/5✓ Branch 0 taken 524304 times.
✓ Branch 1 taken 2097216 times.
✓ Branch 2 taken 7340256 times.
✓ Branch 3 taken 6553800 times.
✗ Branch 4 not taken.
|
16515576 | switch (type.size) { |
| 63 | 524304 | case 8: | |
| 64 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 524304 times.
|
524304 | if (type.is_signed) |
| 65 | ✗ | ((int64_t*)data)[i] = (int64_t)scaled_rand; | |
| 66 | else | ||
| 67 | 524304 | ((uint64_t*)data)[i] = (uint64_t)scaled_rand; | |
| 68 | 524304 | break; | |
| 69 | 2097216 | case 4: | |
| 70 |
2/2✓ Branch 0 taken 1310760 times.
✓ Branch 1 taken 786456 times.
|
2097216 | if (type.is_signed) |
| 71 | 1310760 | ((int32_t*)data)[i] = (int32_t)scaled_rand; | |
| 72 | else | ||
| 73 | 786456 | ((uint32_t*)data)[i] = (uint32_t)scaled_rand; | |
| 74 | 2097216 | break; | |
| 75 | 7340256 | case 2: | |
| 76 |
2/2✓ Branch 0 taken 7078104 times.
✓ Branch 1 taken 262152 times.
|
7340256 | if (type.is_signed) |
| 77 | 7078104 | ((int16_t*)data)[i] = (int16_t)((int16_t)scaled_rand % 8); | |
| 78 | else | ||
| 79 | 262152 | ((uint16_t*)data)[i] = (uint16_t)((int16_t)scaled_rand % 8); | |
| 80 | 7340256 | break; | |
| 81 | 6553800 | case 1: | |
| 82 |
2/2✓ Branch 0 taken 5243040 times.
✓ Branch 1 taken 1310760 times.
|
6553800 | if (type.is_signed) |
| 83 | 5243040 | ((int8_t*)data)[i] = (int8_t)scaled_rand; | |
| 84 | else | ||
| 85 | 1310760 | ((uint8_t*)data)[i] = (uint8_t)scaled_rand; | |
| 86 | 6553800 | break; | |
| 87 | ✗ | default: | |
| 88 | ✗ | throw "load_random_data: no support for data size > 8 or < 1"; // no | |
| 89 | // shenanigans | ||
| 90 | // here | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | 320 | } | |
| 95 | |||
| 96 | 236 | static std::vector<std::string> get_arch_list(volk_func_desc_t desc) | |
| 97 | { | ||
| 98 | 236 | std::vector<std::string> archlist; | |
| 99 | |||
| 100 |
2/2✓ Branch 0 taken 1334 times.
✓ Branch 1 taken 236 times.
|
1570 | for (size_t i = 0; i < desc.n_impls; i++) { |
| 101 |
2/4✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1334 times.
✗ Branch 5 not taken.
|
2668 | archlist.push_back(std::string(desc.impl_names[i])); |
| 102 | } | ||
| 103 | |||
| 104 | 236 | return archlist; | |
| 105 | ✗ | } | |
| 106 | |||
| 107 | template <typename T> | ||
| 108 | 714 | T volk_lexical_cast(const std::string& str) | |
| 109 | { | ||
| 110 |
2/2✓ Branch 1 taken 1220 times.
✓ Branch 2 taken 626 times.
|
1846 | for (unsigned int c_index = 0; c_index < str.size(); ++c_index) { |
| 111 |
7/10✓ Branch 1 taken 1220 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 1220 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 1220 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 88 times.
✓ Branch 9 taken 1132 times.
✓ Branch 10 taken 88 times.
✓ Branch 11 taken 1132 times.
|
1220 | if (str.at(c_index) < '0' || str.at(c_index) > '9') { |
| 112 | 88 | throw "not all numbers!"; | |
| 113 | } | ||
| 114 | } | ||
| 115 | T var; | ||
| 116 |
1/2✓ Branch 1 taken 626 times.
✗ Branch 2 not taken.
|
626 | std::istringstream iss; |
| 117 |
1/2✓ Branch 1 taken 626 times.
✗ Branch 2 not taken.
|
626 | iss.str(str); |
| 118 |
1/2✓ Branch 1 taken 626 times.
✗ Branch 2 not taken.
|
626 | iss >> var; |
| 119 | // deal with any error bits that may have been set on the stream | ||
| 120 | 626 | return var; | |
| 121 | 626 | } | |
| 122 | |||
| 123 | 966 | volk_type_t volk_type_from_string(std::string name) | |
| 124 | { | ||
| 125 | 966 | volk_type_t type; | |
| 126 | 966 | type.is_float = false; | |
| 127 | 966 | type.is_scalar = false; | |
| 128 | 966 | type.is_complex = false; | |
| 129 | 966 | type.is_signed = false; | |
| 130 | 966 | type.size = 0; | |
| 131 |
1/2✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
|
966 | type.str = name; |
| 132 | |||
| 133 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 966 times.
|
966 | if (name.size() < 2) { |
| 134 | ✗ | throw std::string("name too short to be a datatype"); | |
| 135 | } | ||
| 136 | |||
| 137 | // is it a scalar? | ||
| 138 |
3/4✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 96 times.
✓ Branch 4 taken 870 times.
|
966 | if (name[0] == 's') { |
| 139 | 96 | type.is_scalar = true; | |
| 140 |
1/2✓ Branch 2 taken 96 times.
✗ Branch 3 not taken.
|
96 | name = name.substr(1, name.size() - 1); |
| 141 | } | ||
| 142 | |||
| 143 | // get the data size | ||
| 144 | 966 | size_t last_size_pos = name.find_last_of("0123456789"); | |
| 145 |
2/2✓ Branch 0 taken 332 times.
✓ Branch 1 taken 634 times.
|
966 | if (last_size_pos == std::string::npos) { |
| 146 |
3/6✓ Branch 2 taken 332 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 332 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 332 times.
✗ Branch 9 not taken.
|
996 | throw std::string("no size spec in type ").append(name); |
| 147 | } | ||
| 148 | // will throw if malformed | ||
| 149 |
3/4✓ Branch 1 taken 634 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 546 times.
✓ Branch 5 taken 88 times.
|
722 | int size = volk_lexical_cast<int>(name.substr(0, last_size_pos + 1)); |
| 150 | |||
| 151 |
3/6✓ Branch 0 taken 546 times.
✗ Branch 1 not taken.
✓ Branch 2 taken 546 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 546 times.
✗ Branch 5 not taken.
|
546 | assert(((size % 8) == 0) && (size <= 64) && (size != 0)); |
| 152 | 546 | type.size = size / 8; // in bytes | |
| 153 | |||
| 154 |
2/2✓ Branch 1 taken 704 times.
✓ Branch 2 taken 546 times.
|
1250 | for (size_t i = last_size_pos + 1; i < name.size(); i++) { |
| 155 |
5/7✓ Branch 1 taken 704 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 396 times.
✓ Branch 4 taken 98 times.
✓ Branch 5 taken 158 times.
✓ Branch 6 taken 52 times.
✗ Branch 7 not taken.
|
704 | switch (name[i]) { |
| 156 | 396 | case 'f': | |
| 157 | 396 | type.is_float = true; | |
| 158 | 396 | break; | |
| 159 | 98 | case 'i': | |
| 160 | 98 | type.is_signed = true; | |
| 161 | 98 | break; | |
| 162 | 158 | case 'c': | |
| 163 | 158 | type.is_complex = true; | |
| 164 | 158 | break; | |
| 165 | 52 | case 'u': | |
| 166 | 52 | type.is_signed = false; | |
| 167 | 52 | break; | |
| 168 | ✗ | default: | |
| 169 | ✗ | throw std::string("Error: no such type: '") + name[i] + "'"; | |
| 170 | } | ||
| 171 | } | ||
| 172 | |||
| 173 | 546 | return type; | |
| 174 | 420 | } | |
| 175 | |||
| 176 | 236 | std::vector<std::string> split_signature(const std::string& protokernel_signature) | |
| 177 | { | ||
| 178 | 236 | std::vector<std::string> signature_tokens; | |
| 179 | 236 | std::string token; | |
| 180 |
2/2✓ Branch 1 taken 6130 times.
✓ Branch 2 taken 236 times.
|
6366 | for (unsigned int loc = 0; loc < protokernel_signature.size(); ++loc) { |
| 181 |
3/4✓ Branch 1 taken 6130 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 966 times.
✓ Branch 4 taken 5164 times.
|
6130 | if (protokernel_signature.at(loc) == '_') { |
| 182 | // this is a break | ||
| 183 |
1/2✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
|
966 | signature_tokens.push_back(token); |
| 184 |
1/2✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
|
966 | token = ""; |
| 185 | } else { | ||
| 186 |
2/4✓ Branch 1 taken 5164 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 5164 times.
✗ Branch 5 not taken.
|
5164 | token.push_back(protokernel_signature.at(loc)); |
| 187 | } | ||
| 188 | } | ||
| 189 | // Get the last one to the end of the string | ||
| 190 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | signature_tokens.push_back(token); |
| 191 | 472 | return signature_tokens; | |
| 192 | 236 | } | |
| 193 | |||
| 194 | 236 | static void get_signatures_from_name(std::vector<volk_type_t>& inputsig, | |
| 195 | std::vector<volk_type_t>& outputsig, | ||
| 196 | std::string name) | ||
| 197 | { | ||
| 198 | |||
| 199 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | std::vector<std::string> toked = split_signature(name); |
| 200 | |||
| 201 |
2/4✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 236 times.
|
236 | assert(toked[0] == "volk"); |
| 202 |
1/2✓ Branch 3 taken 236 times.
✗ Branch 4 not taken.
|
236 | toked.erase(toked.begin()); |
| 203 | |||
| 204 | // ok. we're assuming a string in the form | ||
| 205 | //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment) | ||
| 206 | |||
| 207 | 236 | enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT; | |
| 208 | 236 | std::string fn_name; | |
| 209 | 236 | volk_type_t type; | |
| 210 |
2/2✓ Branch 1 taken 966 times.
✓ Branch 2 taken 236 times.
|
1202 | for (unsigned int token_index = 0; token_index < toked.size(); ++token_index) { |
| 211 |
1/2✓ Branch 2 taken 966 times.
✗ Branch 3 not taken.
|
966 | std::string token = toked[token_index]; |
| 212 | try { | ||
| 213 |
3/4✓ Branch 1 taken 966 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 546 times.
✓ Branch 5 taken 420 times.
|
1386 | type = volk_type_from_string(token); |
| 214 |
2/2✓ Branch 0 taken 234 times.
✓ Branch 1 taken 312 times.
|
546 | if (side == SIDE_NAME) |
| 215 | 234 | side = SIDE_OUTPUT; // if this is the first one after the name... | |
| 216 | |||
| 217 |
2/2✓ Branch 0 taken 312 times.
✓ Branch 1 taken 234 times.
|
546 | if (side == SIDE_INPUT) |
| 218 |
1/2✓ Branch 1 taken 312 times.
✗ Branch 2 not taken.
|
312 | inputsig.push_back(type); |
| 219 | else | ||
| 220 |
1/2✓ Branch 1 taken 234 times.
✗ Branch 2 not taken.
|
234 | outputsig.push_back(type); |
| 221 | 420 | } catch (...) { | |
| 222 |
6/8✓ Branch 1 taken 420 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 80 times.
✓ Branch 4 taken 340 times.
✓ Branch 6 taken 80 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 80 times.
✓ Branch 9 taken 340 times.
|
500 | if (token[0] == 'x' && (token.size() > 1) && |
| 223 |
4/8✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 80 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 80 times.
✗ Branch 7 not taken.
✓ Branch 8 taken 80 times.
✗ Branch 9 not taken.
|
80 | (token[1] > '0' && token[1] < '9')) { // it's a multiplier |
| 224 |
2/2✓ Branch 0 taken 66 times.
✓ Branch 1 taken 14 times.
|
80 | if (side == SIDE_INPUT) |
| 225 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 66 times.
|
66 | assert(inputsig.size() > 0); |
| 226 | else | ||
| 227 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 14 times.
|
14 | assert(outputsig.size() > 0); |
| 228 |
1/2✓ Branch 1 taken 80 times.
✗ Branch 2 not taken.
|
80 | int multiplier = volk_lexical_cast<int>( |
| 229 |
1/2✓ Branch 2 taken 80 times.
✗ Branch 3 not taken.
|
160 | token.substr(1, token.size() - 1)); // will throw if invalid |
| 230 |
2/2✓ Branch 0 taken 84 times.
✓ Branch 1 taken 80 times.
|
164 | for (int i = 1; i < multiplier; i++) { |
| 231 |
2/2✓ Branch 0 taken 70 times.
✓ Branch 1 taken 14 times.
|
84 | if (side == SIDE_INPUT) |
| 232 |
1/2✓ Branch 2 taken 70 times.
✗ Branch 3 not taken.
|
70 | inputsig.push_back(inputsig.back()); |
| 233 | else | ||
| 234 |
1/2✓ Branch 2 taken 14 times.
✗ Branch 3 not taken.
|
14 | outputsig.push_back(outputsig.back()); |
| 235 | } | ||
| 236 |
2/2✓ Branch 0 taken 236 times.
✓ Branch 1 taken 104 times.
|
340 | } else if (side == |
| 237 | SIDE_INPUT) { // it's the function name, at least it better be | ||
| 238 | 236 | side = SIDE_NAME; | |
| 239 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | fn_name.append("_"); |
| 240 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | fn_name.append(token); |
| 241 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 104 times.
|
104 | } else if (side == SIDE_OUTPUT) { |
| 242 | ✗ | if (token != toked.back()) | |
| 243 | ✗ | throw; // the last token in the name is the alignment | |
| 244 | } | ||
| 245 |
1/2✓ Branch 1 taken 420 times.
✗ Branch 2 not taken.
|
420 | } |
| 246 | 966 | } | |
| 247 | // we don't need an output signature (some fn's operate on the input data, "in | ||
| 248 | // place"), but we do need at least one input! | ||
| 249 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 236 times.
|
236 | assert(inputsig.size() != 0); |
| 250 | 236 | } | |
| 251 | |||
| 252 | ✗ | inline void run_cast_test1(volk_fn_1arg func, | |
| 253 | std::vector<void*>& buffs, | ||
| 254 | unsigned int vlen, | ||
| 255 | unsigned int iter, | ||
| 256 | std::string arch) | ||
| 257 | { | ||
| 258 | ✗ | while (iter--) | |
| 259 | ✗ | func(buffs[0], vlen, arch.c_str()); | |
| 260 | ✗ | } | |
| 261 | |||
| 262 | 534 | inline void run_cast_test2(volk_fn_2arg func, | |
| 263 | std::vector<void*>& buffs, | ||
| 264 | unsigned int vlen, | ||
| 265 | unsigned int iter, | ||
| 266 | std::string arch) | ||
| 267 | { | ||
| 268 |
2/2✓ Branch 0 taken 534 times.
✓ Branch 1 taken 534 times.
|
1068 | while (iter--) |
| 269 | 534 | func(buffs[0], buffs[1], vlen, arch.c_str()); | |
| 270 | 534 | } | |
| 271 | |||
| 272 | 426 | inline void run_cast_test3(volk_fn_3arg func, | |
| 273 | std::vector<void*>& buffs, | ||
| 274 | unsigned int vlen, | ||
| 275 | unsigned int iter, | ||
| 276 | std::string arch) | ||
| 277 | { | ||
| 278 |
2/2✓ Branch 0 taken 426 times.
✓ Branch 1 taken 426 times.
|
852 | while (iter--) |
| 279 | 426 | func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str()); | |
| 280 | 426 | } | |
| 281 | |||
| 282 | 22 | inline void run_cast_test4(volk_fn_4arg func, | |
| 283 | std::vector<void*>& buffs, | ||
| 284 | unsigned int vlen, | ||
| 285 | unsigned int iter, | ||
| 286 | std::string arch) | ||
| 287 | { | ||
| 288 |
2/2✓ Branch 0 taken 22 times.
✓ Branch 1 taken 22 times.
|
44 | while (iter--) |
| 289 | 22 | func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, arch.c_str()); | |
| 290 | 22 | } | |
| 291 | |||
| 292 | 10 | inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, | |
| 293 | std::vector<void*>& buffs, | ||
| 294 | float scalar, | ||
| 295 | unsigned int vlen, | ||
| 296 | unsigned int iter, | ||
| 297 | std::string arch) | ||
| 298 | { | ||
| 299 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | while (iter--) |
| 300 | 10 | func(buffs[0], scalar, vlen, arch.c_str()); | |
| 301 | 10 | } | |
| 302 | |||
| 303 | 246 | inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, | |
| 304 | std::vector<void*>& buffs, | ||
| 305 | float scalar, | ||
| 306 | unsigned int vlen, | ||
| 307 | unsigned int iter, | ||
| 308 | std::string arch) | ||
| 309 | { | ||
| 310 |
2/2✓ Branch 0 taken 246 times.
✓ Branch 1 taken 246 times.
|
492 | while (iter--) |
| 311 | 246 | func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); | |
| 312 | 246 | } | |
| 313 | |||
| 314 | 56 | inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, | |
| 315 | std::vector<void*>& buffs, | ||
| 316 | float scalar, | ||
| 317 | unsigned int vlen, | ||
| 318 | unsigned int iter, | ||
| 319 | std::string arch) | ||
| 320 | { | ||
| 321 |
2/2✓ Branch 0 taken 56 times.
✓ Branch 1 taken 56 times.
|
112 | while (iter--) |
| 322 | 56 | func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); | |
| 323 | 56 | } | |
| 324 | |||
| 325 | ✗ | inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, | |
| 326 | std::vector<void*>& buffs, | ||
| 327 | lv_32fc_t scalar, | ||
| 328 | unsigned int vlen, | ||
| 329 | unsigned int iter, | ||
| 330 | std::string arch) | ||
| 331 | { | ||
| 332 | ✗ | while (iter--) | |
| 333 | ✗ | func(buffs[0], scalar, vlen, arch.c_str()); | |
| 334 | ✗ | } | |
| 335 | |||
| 336 | 30 | inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, | |
| 337 | std::vector<void*>& buffs, | ||
| 338 | lv_32fc_t scalar, | ||
| 339 | unsigned int vlen, | ||
| 340 | unsigned int iter, | ||
| 341 | std::string arch) | ||
| 342 | { | ||
| 343 |
2/2✓ Branch 0 taken 30 times.
✓ Branch 1 taken 30 times.
|
60 | while (iter--) |
| 344 | 30 | func(buffs[0], buffs[1], scalar, vlen, arch.c_str()); | |
| 345 | 30 | } | |
| 346 | |||
| 347 | 10 | inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, | |
| 348 | std::vector<void*>& buffs, | ||
| 349 | lv_32fc_t scalar, | ||
| 350 | unsigned int vlen, | ||
| 351 | unsigned int iter, | ||
| 352 | std::string arch) | ||
| 353 | { | ||
| 354 |
2/2✓ Branch 0 taken 10 times.
✓ Branch 1 taken 10 times.
|
20 | while (iter--) |
| 355 | 10 | func(buffs[0], buffs[1], buffs[2], scalar, vlen, arch.c_str()); | |
| 356 | 10 | } | |
| 357 | |||
| 358 | template <class t> | ||
| 359 | 2560 | bool fcompare(t* in1, t* in2, unsigned int vlen, float tol, bool absolute_mode) | |
| 360 | { | ||
| 361 | 2560 | bool fail = false; | |
| 362 | 2560 | int print_max_errs = 10; | |
| 363 |
2/2✓ Branch 0 taken 167777280 times.
✓ Branch 1 taken 1280 times.
|
335557120 | for (unsigned int i = 0; i < vlen; i++) { |
| 364 |
2/2✓ Branch 0 taken 13631904 times.
✓ Branch 1 taken 154145376 times.
|
335554560 | if (absolute_mode) { |
| 365 |
1/3✗ Branch 0 not taken.
✗ Branch 1 not taken.
✓ Branch 2 taken 13631904 times.
|
27263808 | if (fabs(((t*)(in1))[i] - ((t*)(in2))[i]) > tol) { |
| 366 | ✗ | fail = true; | |
| 367 | ✗ | if (print_max_errs-- > 0) { | |
| 368 | ✗ | std::cout << "offset " << i << " in1: " << t(((t*)(in1))[i]) | |
| 369 | ✗ | << " in2: " << t(((t*)(in2))[i]); | |
| 370 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 371 | } | ||
| 372 | } | ||
| 373 | } else { | ||
| 374 | // for very small numbers we'll see round off errors due to limited | ||
| 375 | // precision. So a special test case... | ||
| 376 |
3/3✓ Branch 0 taken 360 times.
✓ Branch 1 taken 26561544 times.
✓ Branch 2 taken 127583472 times.
|
308290752 | if (fabs(((t*)(in1))[i]) < 1e-30) { |
| 377 |
2/3✗ Branch 0 not taken.
✓ Branch 1 taken 360 times.
✓ Branch 2 taken 7424808 times.
|
14850336 | if (fabs(((t*)(in2))[i]) > tol) { |
| 378 | ✗ | fail = true; | |
| 379 | ✗ | if (print_max_errs-- > 0) { | |
| 380 | ✗ | std::cout << "offset " << i << " in1: " << t(((t*)(in1))[i]) | |
| 381 | ✗ | << " in2: " << t(((t*)(in2))[i]); | |
| 382 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 383 | } | ||
| 384 | } | ||
| 385 | } | ||
| 386 | // the primary test is the percent different greater than given tol | ||
| 387 |
2/4✗ Branch 0 not taken.
✓ Branch 1 taken 19136736 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 127583472 times.
|
293440416 | else if (fabs(((t*)(in1))[i] - ((t*)(in2))[i]) / fabs(((t*)in1)[i]) > tol) { |
| 388 | ✗ | fail = true; | |
| 389 | ✗ | if (print_max_errs-- > 0) { | |
| 390 | ✗ | std::cout << "offset " << i << " in1: " << t(((t*)(in1))[i]) | |
| 391 | ✗ | << " in2: " << t(((t*)(in2))[i]); | |
| 392 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 393 | } | ||
| 394 | } | ||
| 395 | } | ||
| 396 | } | ||
| 397 | |||
| 398 | 2560 | return fail; | |
| 399 | } | ||
| 400 | |||
| 401 | template <class t> | ||
| 402 | 1216 | bool ccompare(t* in1, t* in2, unsigned int vlen, float tol, bool absolute_mode) | |
| 403 | { | ||
| 404 | 1216 | bool fail = false; | |
| 405 | 1216 | int print_max_errs = 10; | |
| 406 |
2/2✓ Branch 0 taken 79694208 times.
✓ Branch 1 taken 608 times.
|
159389632 | for (unsigned int i = 0; i < 2 * vlen; i += 2) { |
| 407 |
3/6✓ Branch 2 taken 79694208 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 79694208 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 79694208 times.
✗ Branch 8 not taken.
|
478165248 | if (std::isnan(in1[i]) || std::isnan(in1[i + 1]) || std::isnan(in2[i]) || |
| 408 |
3/6✓ Branch 2 taken 79694208 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 79694208 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 79694208 times.
✗ Branch 8 not taken.
|
478165248 | std::isnan(in2[i + 1]) || std::isinf(in1[i]) || std::isinf(in1[i + 1]) || |
| 409 |
3/6✓ Branch 0 taken 79694208 times.
✗ Branch 1 not taken.
✗ Branch 4 not taken.
✓ Branch 5 taken 79694208 times.
✗ Branch 6 not taken.
✓ Branch 7 taken 79694208 times.
|
478165248 | std::isinf(in2[i]) || std::isinf(in2[i + 1])) { |
| 410 | ✗ | fail = true; | |
| 411 | ✗ | if (print_max_errs-- > 0) { | |
| 412 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
| 413 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] | |
| 414 | ✗ | << "j"; | |
| 415 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 416 | } | ||
| 417 | } | ||
| 418 | 159388416 | t diff[2] = { in1[i] - in2[i], in1[i + 1] - in2[i + 1] }; | |
| 419 | 159388416 | t err = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]); | |
| 420 | 159388416 | t norm = std::sqrt(in1[i] * in1[i] + in1[i + 1] * in1[i + 1]); | |
| 421 | |||
| 422 |
2/2✓ Branch 0 taken 6291648 times.
✓ Branch 1 taken 73402560 times.
|
159388416 | if (absolute_mode) { |
| 423 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 6291648 times.
|
12583296 | if (err > tol) { |
| 424 | ✗ | fail = true; | |
| 425 | ✗ | if (print_max_errs-- > 0) { | |
| 426 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
| 427 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] | |
| 428 | ✗ | << "j"; | |
| 429 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 430 | } | ||
| 431 | } | ||
| 432 | } else { | ||
| 433 | // for very small numbers we'll see round off errors due to limited | ||
| 434 | // precision. So a special test case... | ||
| 435 |
2/2✓ Branch 0 taken 5268378 times.
✓ Branch 1 taken 68134182 times.
|
146805120 | if (norm < 1e-30) { |
| 436 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 5268378 times.
|
10536756 | if (err > tol) { |
| 437 | ✗ | fail = true; | |
| 438 | ✗ | if (print_max_errs-- > 0) { | |
| 439 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
| 440 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " | |
| 441 | ✗ | << in2[i + 1] << "j"; | |
| 442 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 443 | } | ||
| 444 | } | ||
| 445 | } | ||
| 446 | // the primary test is the percent different greater than given tol | ||
| 447 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 68134182 times.
|
136268364 | else if ((err / norm) > tol) { |
| 448 | ✗ | fail = true; | |
| 449 | ✗ | if (print_max_errs-- > 0) { | |
| 450 | ✗ | std::cout << "offset " << i / 2 << " in1: " << in1[i] << " + " | |
| 451 | ✗ | << in1[i + 1] << "j in2: " << in2[i] << " + " << in2[i + 1] | |
| 452 | ✗ | << "j"; | |
| 453 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 454 | } | ||
| 455 | } | ||
| 456 | } | ||
| 457 | } | ||
| 458 | |||
| 459 | 1216 | return fail; | |
| 460 | } | ||
| 461 | |||
| 462 | template <class t> | ||
| 463 | 1476 | bool icompare(t* in1, t* in2, unsigned int vlen, unsigned int tol, bool absolute_mode) | |
| 464 | { | ||
| 465 | 1476 | bool fail = false; | |
| 466 | 1476 | int print_max_errs = 10; | |
| 467 |
2/2✓ Branch 0 taken 123473592 times.
✓ Branch 1 taken 738 times.
|
246948660 | for (unsigned int i = 0; i < vlen; i++) { |
| 468 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 123473592 times.
|
246947184 | if (((unsigned int)abs(int(((t*)(in1))[i]) - int(((t*)(in2))[i]))) > tol) { |
| 469 | ✗ | fail = true; | |
| 470 | ✗ | if (print_max_errs-- > 0) { | |
| 471 | ✗ | std::cout << "offset " << i | |
| 472 | ✗ | << " in1: " << static_cast<int>(t(((t*)(in1))[i])) | |
| 473 | ✗ | << " in2: " << static_cast<int>(t(((t*)(in2))[i])); | |
| 474 | ✗ | std::cout << " tolerance was: " << tol << std::endl; | |
| 475 | } | ||
| 476 | } | ||
| 477 | } | ||
| 478 | |||
| 479 | 1476 | return fail; | |
| 480 | } | ||
| 481 | |||
| 482 | class volk_qa_aligned_mem_pool | ||
| 483 | { | ||
| 484 | public: | ||
| 485 | 3514 | void* get_new(size_t size) | |
| 486 | { | ||
| 487 |
1/2✓ Branch 1 taken 3514 times.
✗ Branch 2 not taken.
|
3514 | size_t alignment = volk_get_alignment(); |
| 488 |
1/2✓ Branch 1 taken 3514 times.
✗ Branch 2 not taken.
|
3514 | void* ptr = volk_malloc(size, alignment); |
| 489 | 3514 | memset(ptr, 0x00, size); | |
| 490 |
1/2✓ Branch 1 taken 3514 times.
✗ Branch 2 not taken.
|
3514 | _mems.push_back(ptr); |
| 491 | 3514 | return ptr; | |
| 492 | } | ||
| 493 | 236 | ~volk_qa_aligned_mem_pool() | |
| 494 | { | ||
| 495 |
2/2✓ Branch 1 taken 3514 times.
✓ Branch 2 taken 236 times.
|
3750 | for (unsigned int ii = 0; ii < _mems.size(); ++ii) { |
| 496 | 3514 | volk_free(_mems[ii]); | |
| 497 | } | ||
| 498 | 236 | } | |
| 499 | |||
| 500 | private: | ||
| 501 | std::vector<void*> _mems; | ||
| 502 | }; | ||
| 503 | |||
| 504 | 236 | bool run_volk_tests(volk_func_desc_t desc, | |
| 505 | void (*manual_func)(), | ||
| 506 | std::string name, | ||
| 507 | volk_test_params_t test_params, | ||
| 508 | std::vector<volk_test_results_t>* results, | ||
| 509 | std::string puppet_master_name) | ||
| 510 | { | ||
| 511 |
3/6✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 8 taken 236 times.
✗ Branch 9 not taken.
✓ Branch 11 taken 236 times.
✗ Branch 12 not taken.
|
472 | return run_volk_tests(desc, |
| 512 | manual_func, | ||
| 513 | name, | ||
| 514 | test_params.tol(), | ||
| 515 | test_params.scalar(), | ||
| 516 | test_params.vlen(), | ||
| 517 | test_params.iter(), | ||
| 518 | results, | ||
| 519 | puppet_master_name, | ||
| 520 | 236 | test_params.absolute_mode(), | |
| 521 | 708 | test_params.benchmark_mode()); | |
| 522 | } | ||
| 523 | |||
| 524 | 236 | bool run_volk_tests(volk_func_desc_t desc, | |
| 525 | void (*manual_func)(), | ||
| 526 | std::string name, | ||
| 527 | float tol, | ||
| 528 | lv_32fc_t scalar, | ||
| 529 | unsigned int vlen, | ||
| 530 | unsigned int iter, | ||
| 531 | std::vector<volk_test_results_t>* results, | ||
| 532 | std::string puppet_master_name, | ||
| 533 | bool absolute_mode, | ||
| 534 | bool benchmark_mode) | ||
| 535 | { | ||
| 536 | // Initialize this entry in results vector | ||
| 537 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->push_back(volk_test_results_t()); |
| 538 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->back().name = name; |
| 539 | 236 | results->back().vlen = vlen; | |
| 540 | 236 | results->back().iter = iter; | |
| 541 |
7/14✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 236 times.
✗ Branch 8 not taken.
✓ Branch 10 taken 236 times.
✗ Branch 11 not taken.
✓ Branch 13 taken 236 times.
✗ Branch 14 not taken.
✓ Branch 16 taken 236 times.
✗ Branch 17 not taken.
✓ Branch 19 taken 236 times.
✗ Branch 20 not taken.
|
236 | std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << ")" |
| 542 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | << std::endl; |
| 543 | |||
| 544 | // vlen_twiddle will increase vlen for malloc and data generation | ||
| 545 | // but kernels will still be called with the user provided vlen. | ||
| 546 | // This is useful for causing errors in kernels that do bad reads | ||
| 547 | 236 | const unsigned int vlen_twiddle = 5; | |
| 548 | 236 | vlen = vlen + vlen_twiddle; | |
| 549 | |||
| 550 | 236 | const float tol_f = tol; | |
| 551 | 236 | const unsigned int tol_i = static_cast<const unsigned int>(tol); | |
| 552 | |||
| 553 | // first let's get a list of available architectures for the test | ||
| 554 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | std::vector<std::string> arch_list = get_arch_list(desc); |
| 555 | |||
| 556 |
2/6✗ Branch 0 not taken.
✓ Branch 1 taken 236 times.
✗ Branch 3 not taken.
✗ Branch 4 not taken.
✗ Branch 5 not taken.
✓ Branch 6 taken 236 times.
|
236 | if ((!benchmark_mode) && (arch_list.size() < 2)) { |
| 557 | ✗ | std::cout << "no architectures to test" << std::endl; | |
| 558 | ✗ | return false; | |
| 559 | } | ||
| 560 | |||
| 561 | // something that can hang onto memory and cleanup when this function exits | ||
| 562 | 236 | volk_qa_aligned_mem_pool mem_pool; | |
| 563 | |||
| 564 | // now we have to get a function signature by parsing the name | ||
| 565 | 236 | std::vector<volk_type_t> inputsig, outputsig; | |
| 566 | try { | ||
| 567 |
2/4✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
|
236 | get_signatures_from_name(inputsig, outputsig, name); |
| 568 | ✗ | } catch (std::exception& error) { | |
| 569 | ✗ | std::cerr << "Error: unable to get function signature from kernel name" | |
| 570 | ✗ | << std::endl; | |
| 571 | ✗ | std::cerr << " - " << name << std::endl; | |
| 572 | ✗ | return false; | |
| 573 | ✗ | } | |
| 574 | |||
| 575 | // pull the input scalars into their own vector | ||
| 576 | 236 | std::vector<volk_type_t> inputsc; | |
| 577 |
2/2✓ Branch 1 taken 382 times.
✓ Branch 2 taken 236 times.
|
618 | for (size_t i = 0; i < inputsig.size(); i++) { |
| 578 |
2/2✓ Branch 1 taken 62 times.
✓ Branch 2 taken 320 times.
|
382 | if (inputsig[i].is_scalar) { |
| 579 |
1/2✓ Branch 2 taken 62 times.
✗ Branch 3 not taken.
|
62 | inputsc.push_back(inputsig[i]); |
| 580 |
1/2✓ Branch 4 taken 62 times.
✗ Branch 5 not taken.
|
62 | inputsig.erase(inputsig.begin() + i); |
| 581 | 62 | i -= 1; | |
| 582 | } | ||
| 583 | } | ||
| 584 | 236 | std::vector<void*> inbuffs; | |
| 585 |
2/2✓ Branch 1 taken 320 times.
✓ Branch 2 taken 236 times.
|
556 | for (unsigned int inputsig_index = 0; inputsig_index < inputsig.size(); |
| 586 | ++inputsig_index) { | ||
| 587 |
1/2✓ Branch 2 taken 320 times.
✗ Branch 3 not taken.
|
320 | volk_type_t sig = inputsig[inputsig_index]; |
| 588 |
1/2✓ Branch 0 taken 320 times.
✗ Branch 1 not taken.
|
320 | if (!sig.is_scalar) // we don't make buffers for scalars |
| 589 | 320 | inbuffs.push_back( | |
| 590 |
4/6✓ Branch 0 taken 130 times.
✓ Branch 1 taken 190 times.
✓ Branch 3 taken 320 times.
✗ Branch 4 not taken.
✓ Branch 6 taken 320 times.
✗ Branch 7 not taken.
|
320 | mem_pool.get_new(vlen * sig.size * (sig.is_complex ? 2 : 1))); |
| 591 | 320 | } | |
| 592 |
2/2✓ Branch 1 taken 320 times.
✓ Branch 2 taken 236 times.
|
556 | for (size_t i = 0; i < inbuffs.size(); i++) { |
| 593 |
2/4✓ Branch 2 taken 320 times.
✗ Branch 3 not taken.
✓ Branch 6 taken 320 times.
✗ Branch 7 not taken.
|
320 | load_random_data(inbuffs[i], inputsig[i], vlen); |
| 594 | } | ||
| 595 | |||
| 596 | // ok let's make a vector of vector of void buffers, which holds the input/output | ||
| 597 | // vectors for each arch | ||
| 598 | 236 | std::vector<std::vector<void*>> test_data; | |
| 599 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
| 600 | 1334 | std::vector<void*> arch_buffs; | |
| 601 |
2/2✓ Branch 1 taken 1396 times.
✓ Branch 2 taken 1334 times.
|
2730 | for (size_t j = 0; j < outputsig.size(); j++) { |
| 602 |
2/4✓ Branch 2 taken 1396 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1396 times.
✗ Branch 6 not taken.
|
2792 | arch_buffs.push_back(mem_pool.get_new(vlen * outputsig[j].size * |
| 603 |
2/2✓ Branch 1 taken 272 times.
✓ Branch 2 taken 1124 times.
|
1396 | (outputsig[j].is_complex ? 2 : 1))); |
| 604 | } | ||
| 605 |
2/2✓ Branch 1 taken 1798 times.
✓ Branch 2 taken 1334 times.
|
3132 | for (size_t j = 0; j < inputsig.size(); j++) { |
| 606 |
1/2✓ Branch 2 taken 1798 times.
✗ Branch 3 not taken.
|
3596 | void* arch_inbuff = mem_pool.get_new(vlen * inputsig[j].size * |
| 607 |
2/2✓ Branch 1 taken 718 times.
✓ Branch 2 taken 1080 times.
|
1798 | (inputsig[j].is_complex ? 2 : 1)); |
| 608 | 3596 | memcpy(arch_inbuff, | |
| 609 | 1798 | inbuffs[j], | |
| 610 |
2/2✓ Branch 2 taken 718 times.
✓ Branch 3 taken 1080 times.
|
1798 | vlen * inputsig[j].size * (inputsig[j].is_complex ? 2 : 1)); |
| 611 |
1/2✓ Branch 1 taken 1798 times.
✗ Branch 2 not taken.
|
1798 | arch_buffs.push_back(arch_inbuff); |
| 612 | } | ||
| 613 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | test_data.push_back(arch_buffs); |
| 614 | 1334 | } | |
| 615 | |||
| 616 | 236 | std::vector<volk_type_t> both_sigs; | |
| 617 |
1/2✓ Branch 5 taken 236 times.
✗ Branch 6 not taken.
|
236 | both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end()); |
| 618 |
1/2✓ Branch 5 taken 236 times.
✗ Branch 6 not taken.
|
236 | both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end()); |
| 619 | |||
| 620 | // now run the test | ||
| 621 | 236 | vlen = vlen - vlen_twiddle; | |
| 622 | 236 | std::chrono::time_point<std::chrono::system_clock> start, end; | |
| 623 | 236 | std::vector<double> profile_times; | |
| 624 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
| 625 | 1334 | start = std::chrono::system_clock::now(); | |
| 626 | |||
| 627 |
4/5✓ Branch 1 taken 10 times.
✓ Branch 2 taken 810 times.
✓ Branch 3 taken 492 times.
✓ Branch 4 taken 22 times.
✗ Branch 5 not taken.
|
1334 | switch (both_sigs.size()) { |
| 628 | 10 | case 1: | |
| 629 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
|
10 | if (inputsc.size() == 0) { |
| 630 | ✗ | run_cast_test1( | |
| 631 | ✗ | (volk_fn_1arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
| 632 |
3/6✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 10 times.
✗ Branch 7 not taken.
|
10 | } else if (inputsc.size() == 1 && inputsc[0].is_float) { |
| 633 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 10 times.
|
10 | if (inputsc[0].is_complex) { |
| 634 | ✗ | run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), | |
| 635 | ✗ | test_data[i], | |
| 636 | scalar, | ||
| 637 | vlen, | ||
| 638 | iter, | ||
| 639 | ✗ | arch_list[i]); | |
| 640 | } else { | ||
| 641 |
2/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 10 times.
✗ Branch 6 not taken.
|
20 | run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), |
| 642 | 10 | test_data[i], | |
| 643 | scalar.real(), | ||
| 644 | vlen, | ||
| 645 | iter, | ||
| 646 | 10 | arch_list[i]); | |
| 647 | } | ||
| 648 | } else | ||
| 649 | ✗ | throw "unsupported 1 arg function >1 scalars"; | |
| 650 | 10 | break; | |
| 651 | 810 | case 2: | |
| 652 |
2/2✓ Branch 1 taken 534 times.
✓ Branch 2 taken 276 times.
|
810 | if (inputsc.size() == 0) { |
| 653 |
2/4✓ Branch 1 taken 534 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 534 times.
✗ Branch 5 not taken.
|
1068 | run_cast_test2( |
| 654 | 1068 | (volk_fn_2arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
| 655 |
3/6✓ Branch 1 taken 276 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 276 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 276 times.
✗ Branch 7 not taken.
|
276 | } else if (inputsc.size() == 1 && inputsc[0].is_float) { |
| 656 |
2/2✓ Branch 1 taken 30 times.
✓ Branch 2 taken 246 times.
|
276 | if (inputsc[0].is_complex) { |
| 657 |
2/4✓ Branch 1 taken 30 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 30 times.
✗ Branch 5 not taken.
|
60 | run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), |
| 658 | 30 | test_data[i], | |
| 659 | scalar, | ||
| 660 | vlen, | ||
| 661 | iter, | ||
| 662 | 30 | arch_list[i]); | |
| 663 | } else { | ||
| 664 |
2/4✓ Branch 1 taken 246 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 246 times.
✗ Branch 6 not taken.
|
492 | run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), |
| 665 | 246 | test_data[i], | |
| 666 | scalar.real(), | ||
| 667 | vlen, | ||
| 668 | iter, | ||
| 669 | 246 | arch_list[i]); | |
| 670 | } | ||
| 671 | } else | ||
| 672 | ✗ | throw "unsupported 2 arg function >1 scalars"; | |
| 673 | 810 | break; | |
| 674 | 492 | case 3: | |
| 675 |
2/2✓ Branch 1 taken 426 times.
✓ Branch 2 taken 66 times.
|
492 | if (inputsc.size() == 0) { |
| 676 |
2/4✓ Branch 1 taken 426 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 426 times.
✗ Branch 5 not taken.
|
852 | run_cast_test3( |
| 677 | 852 | (volk_fn_3arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
| 678 |
3/6✓ Branch 1 taken 66 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 66 times.
✗ Branch 5 not taken.
✓ Branch 6 taken 66 times.
✗ Branch 7 not taken.
|
66 | } else if (inputsc.size() == 1 && inputsc[0].is_float) { |
| 679 |
2/2✓ Branch 1 taken 10 times.
✓ Branch 2 taken 56 times.
|
66 | if (inputsc[0].is_complex) { |
| 680 |
2/4✓ Branch 1 taken 10 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 10 times.
✗ Branch 5 not taken.
|
20 | run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), |
| 681 | 10 | test_data[i], | |
| 682 | scalar, | ||
| 683 | vlen, | ||
| 684 | iter, | ||
| 685 | 10 | arch_list[i]); | |
| 686 | } else { | ||
| 687 |
2/4✓ Branch 1 taken 56 times.
✗ Branch 2 not taken.
✓ Branch 5 taken 56 times.
✗ Branch 6 not taken.
|
112 | run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), |
| 688 | 56 | test_data[i], | |
| 689 | scalar.real(), | ||
| 690 | vlen, | ||
| 691 | iter, | ||
| 692 | 56 | arch_list[i]); | |
| 693 | } | ||
| 694 | } else | ||
| 695 | ✗ | throw "unsupported 3 arg function >1 scalars"; | |
| 696 | 492 | break; | |
| 697 | 22 | case 4: | |
| 698 |
2/4✓ Branch 1 taken 22 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 22 times.
✗ Branch 5 not taken.
|
44 | run_cast_test4( |
| 699 | 44 | (volk_fn_4arg)(manual_func), test_data[i], vlen, iter, arch_list[i]); | |
| 700 | 22 | break; | |
| 701 | ✗ | default: | |
| 702 | ✗ | throw "no function handler for this signature"; | |
| 703 | break; | ||
| 704 | } | ||
| 705 | |||
| 706 | 1334 | end = std::chrono::system_clock::now(); | |
| 707 |
2/4✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 1334 times.
✗ Branch 5 not taken.
|
1334 | std::chrono::duration<double> elapsed_seconds = end - start; |
| 708 | 1334 | double arch_time = 1000.0 * elapsed_seconds.count(); | |
| 709 |
5/10✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1334 times.
✗ Branch 6 not taken.
✓ Branch 8 taken 1334 times.
✗ Branch 9 not taken.
✓ Branch 11 taken 1334 times.
✗ Branch 12 not taken.
✓ Branch 14 taken 1334 times.
✗ Branch 15 not taken.
|
1334 | std::cout << arch_list[i] << " completed in " << arch_time << " ms" << std::endl; |
| 710 | 2668 | volk_test_time_t result; | |
| 711 |
1/2✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
|
1334 | result.name = arch_list[i]; |
| 712 | 1334 | result.time = arch_time; | |
| 713 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | result.units = "ms"; |
| 714 | 1334 | result.pass = true; | |
| 715 |
2/4✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
✓ Branch 5 taken 1334 times.
✗ Branch 6 not taken.
|
1334 | results->back().results[result.name] = result; |
| 716 | |||
| 717 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | profile_times.push_back(arch_time); |
| 718 | } | ||
| 719 | |||
| 720 | // and now compare each output to the generic output | ||
| 721 | // first we have to know which output is the generic one, they aren't in order... | ||
| 722 | 236 | size_t generic_offset = 0; | |
| 723 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
| 724 |
3/4✓ Branch 2 taken 1334 times.
✗ Branch 3 not taken.
✓ Branch 4 taken 232 times.
✓ Branch 5 taken 1102 times.
|
1334 | if (arch_list[i] == "generic") { |
| 725 | 232 | generic_offset = i; | |
| 726 | } | ||
| 727 | } | ||
| 728 | |||
| 729 | // Just in case a kernel wrote to OOB memory, use the twiddled vlen | ||
| 730 | 236 | vlen = vlen + vlen_twiddle; | |
| 731 | bool fail; | ||
| 732 | 236 | bool fail_global = false; | |
| 733 | 472 | std::vector<bool> arch_results; | |
| 734 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
| 735 | 1334 | fail = false; | |
| 736 |
2/2✓ Branch 0 taken 1098 times.
✓ Branch 1 taken 236 times.
|
1334 | if (i != generic_offset) { |
| 737 |
2/2✓ Branch 1 taken 2626 times.
✓ Branch 2 taken 1098 times.
|
3724 | for (size_t j = 0; j < both_sigs.size(); j++) { |
| 738 |
2/2✓ Branch 1 taken 1888 times.
✓ Branch 2 taken 738 times.
|
2626 | if (both_sigs[j].is_float) { |
| 739 |
2/2✓ Branch 1 taken 146 times.
✓ Branch 2 taken 1742 times.
|
1888 | if (both_sigs[j].size == 8) { |
| 740 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 146 times.
|
146 | if (both_sigs[j].is_complex) { |
| 741 | ✗ | fail = ccompare((double*)test_data[generic_offset][j], | |
| 742 | ✗ | (double*)test_data[i][j], | |
| 743 | vlen, | ||
| 744 | tol_f, | ||
| 745 | absolute_mode); | ||
| 746 | } else { | ||
| 747 |
1/2✓ Branch 3 taken 146 times.
✗ Branch 4 not taken.
|
146 | fail = fcompare((double*)test_data[generic_offset][j], |
| 748 | 146 | (double*)test_data[i][j], | |
| 749 | vlen, | ||
| 750 | tol_f, | ||
| 751 | absolute_mode); | ||
| 752 | } | ||
| 753 | } else { | ||
| 754 |
2/2✓ Branch 1 taken 608 times.
✓ Branch 2 taken 1134 times.
|
1742 | if (both_sigs[j].is_complex) { |
| 755 |
1/2✓ Branch 3 taken 608 times.
✗ Branch 4 not taken.
|
608 | fail = ccompare((float*)test_data[generic_offset][j], |
| 756 | 608 | (float*)test_data[i][j], | |
| 757 | vlen, | ||
| 758 | tol_f, | ||
| 759 | absolute_mode); | ||
| 760 | } else { | ||
| 761 |
1/2✓ Branch 3 taken 1134 times.
✗ Branch 4 not taken.
|
1134 | fail = fcompare((float*)test_data[generic_offset][j], |
| 762 | 1134 | (float*)test_data[i][j], | |
| 763 | vlen, | ||
| 764 | tol_f, | ||
| 765 | absolute_mode); | ||
| 766 | } | ||
| 767 | } | ||
| 768 | } else { | ||
| 769 | // i could replace this whole switch statement with a memcmp if i | ||
| 770 | // wasn't interested in printing the outputs where they differ | ||
| 771 |
4/5✓ Branch 1 taken 28 times.
✓ Branch 2 taken 170 times.
✓ Branch 3 taken 348 times.
✓ Branch 4 taken 192 times.
✗ Branch 5 not taken.
|
738 | switch (both_sigs[j].size) { |
| 772 | 28 | case 8: | |
| 773 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
|
28 | if (both_sigs[j].is_signed) { |
| 774 | ✗ | fail = icompare((int64_t*)test_data[generic_offset][j], | |
| 775 | ✗ | (int64_t*)test_data[i][j], | |
| 776 | ✗ | vlen * (both_sigs[j].is_complex ? 2 : 1), | |
| 777 | tol_i, | ||
| 778 | absolute_mode); | ||
| 779 | } else { | ||
| 780 |
1/2✓ Branch 3 taken 28 times.
✗ Branch 4 not taken.
|
28 | fail = icompare((uint64_t*)test_data[generic_offset][j], |
| 781 | 28 | (uint64_t*)test_data[i][j], | |
| 782 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 28 times.
|
28 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 783 | tol_i, | ||
| 784 | absolute_mode); | ||
| 785 | } | ||
| 786 | 28 | break; | |
| 787 | 170 | case 4: | |
| 788 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 170 times.
|
170 | if (both_sigs[j].is_complex) { |
| 789 | ✗ | if (both_sigs[j].is_signed) { | |
| 790 | ✗ | fail = icompare((int16_t*)test_data[generic_offset][j], | |
| 791 | ✗ | (int16_t*)test_data[i][j], | |
| 792 | ✗ | vlen * (both_sigs[j].is_complex ? 2 : 1), | |
| 793 | tol_i, | ||
| 794 | absolute_mode); | ||
| 795 | } else { | ||
| 796 | ✗ | fail = icompare((uint16_t*)test_data[generic_offset][j], | |
| 797 | ✗ | (uint16_t*)test_data[i][j], | |
| 798 | ✗ | vlen * (both_sigs[j].is_complex ? 2 : 1), | |
| 799 | tol_i, | ||
| 800 | absolute_mode); | ||
| 801 | } | ||
| 802 | } else { | ||
| 803 |
2/2✓ Branch 1 taken 82 times.
✓ Branch 2 taken 88 times.
|
170 | if (both_sigs[j].is_signed) { |
| 804 |
1/2✓ Branch 3 taken 82 times.
✗ Branch 4 not taken.
|
82 | fail = icompare((int32_t*)test_data[generic_offset][j], |
| 805 | 82 | (int32_t*)test_data[i][j], | |
| 806 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 82 times.
|
82 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 807 | tol_i, | ||
| 808 | absolute_mode); | ||
| 809 | } else { | ||
| 810 |
1/2✓ Branch 3 taken 88 times.
✗ Branch 4 not taken.
|
88 | fail = icompare((uint32_t*)test_data[generic_offset][j], |
| 811 | 88 | (uint32_t*)test_data[i][j], | |
| 812 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 88 times.
|
88 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 813 | tol_i, | ||
| 814 | absolute_mode); | ||
| 815 | } | ||
| 816 | } | ||
| 817 | 170 | break; | |
| 818 | 348 | case 2: | |
| 819 |
2/2✓ Branch 1 taken 296 times.
✓ Branch 2 taken 52 times.
|
348 | if (both_sigs[j].is_signed) { |
| 820 |
1/2✓ Branch 3 taken 296 times.
✗ Branch 4 not taken.
|
296 | fail = icompare((int16_t*)test_data[generic_offset][j], |
| 821 | 296 | (int16_t*)test_data[i][j], | |
| 822 |
2/2✓ Branch 1 taken 140 times.
✓ Branch 2 taken 156 times.
|
296 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 823 | tol_i, | ||
| 824 | absolute_mode); | ||
| 825 | } else { | ||
| 826 |
1/2✓ Branch 3 taken 52 times.
✗ Branch 4 not taken.
|
52 | fail = icompare((uint16_t*)test_data[generic_offset][j], |
| 827 | 52 | (uint16_t*)test_data[i][j], | |
| 828 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 52 times.
|
52 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 829 | tol_i, | ||
| 830 | absolute_mode); | ||
| 831 | } | ||
| 832 | 348 | break; | |
| 833 | 192 | case 1: | |
| 834 |
2/2✓ Branch 1 taken 136 times.
✓ Branch 2 taken 56 times.
|
192 | if (both_sigs[j].is_signed) { |
| 835 |
1/2✓ Branch 3 taken 136 times.
✗ Branch 4 not taken.
|
136 | fail = icompare((int8_t*)test_data[generic_offset][j], |
| 836 | 136 | (int8_t*)test_data[i][j], | |
| 837 |
2/2✓ Branch 1 taken 64 times.
✓ Branch 2 taken 72 times.
|
136 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 838 | tol_i, | ||
| 839 | absolute_mode); | ||
| 840 | } else { | ||
| 841 |
1/2✓ Branch 3 taken 56 times.
✗ Branch 4 not taken.
|
56 | fail = icompare((uint8_t*)test_data[generic_offset][j], |
| 842 | 56 | (uint8_t*)test_data[i][j], | |
| 843 |
1/2✗ Branch 1 not taken.
✓ Branch 2 taken 56 times.
|
56 | vlen * (both_sigs[j].is_complex ? 2 : 1), |
| 844 | tol_i, | ||
| 845 | absolute_mode); | ||
| 846 | } | ||
| 847 | 192 | break; | |
| 848 | ✗ | default: | |
| 849 | ✗ | fail = 1; | |
| 850 | } | ||
| 851 | } | ||
| 852 |
1/2✗ Branch 0 not taken.
✓ Branch 1 taken 2626 times.
|
2626 | if (fail) { |
| 853 | ✗ | volk_test_time_t* result = &results->back().results[arch_list[i]]; | |
| 854 | ✗ | result->pass = false; | |
| 855 | ✗ | fail_global = true; | |
| 856 | ✗ | std::cout << name << ": fail on arch " << arch_list[i] << std::endl; | |
| 857 | } | ||
| 858 | } | ||
| 859 | } | ||
| 860 |
1/2✓ Branch 1 taken 1334 times.
✗ Branch 2 not taken.
|
1334 | arch_results.push_back(!fail); |
| 861 | } | ||
| 862 | |||
| 863 | 236 | double best_time_a = std::numeric_limits<double>::max(); | |
| 864 | 236 | double best_time_u = std::numeric_limits<double>::max(); | |
| 865 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
708 | std::string best_arch_a = "generic"; |
| 866 |
1/2✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
|
236 | std::string best_arch_u = "generic"; |
| 867 |
2/2✓ Branch 1 taken 1334 times.
✓ Branch 2 taken 236 times.
|
1570 | for (size_t i = 0; i < arch_list.size(); i++) { |
| 868 |
6/8✓ Branch 1 taken 875 times.
✓ Branch 2 taken 459 times.
✓ Branch 4 taken 875 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 875 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 465 times.
✓ Branch 10 taken 869 times.
|
2209 | if ((profile_times[i] < best_time_u) && arch_results[i] && |
| 869 |
2/2✓ Branch 0 taken 465 times.
✓ Branch 1 taken 410 times.
|
875 | desc.impl_alignment[i] == 0) { |
| 870 | 465 | best_time_u = profile_times[i]; | |
| 871 |
1/2✓ Branch 2 taken 465 times.
✗ Branch 3 not taken.
|
465 | best_arch_u = arch_list[i]; |
| 872 | } | ||
| 873 |
6/8✓ Branch 1 taken 557 times.
✓ Branch 2 taken 777 times.
✓ Branch 4 taken 557 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 557 times.
✗ Branch 8 not taken.
✓ Branch 9 taken 557 times.
✓ Branch 10 taken 777 times.
|
1334 | if ((profile_times[i] < best_time_a) && arch_results[i]) { |
| 874 | 557 | best_time_a = profile_times[i]; | |
| 875 |
1/2✓ Branch 2 taken 557 times.
✗ Branch 3 not taken.
|
557 | best_arch_a = arch_list[i]; |
| 876 | } | ||
| 877 | } | ||
| 878 | |||
| 879 |
3/6✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 236 times.
✗ Branch 8 not taken.
|
236 | std::cout << "Best aligned arch: " << best_arch_a << std::endl; |
| 880 |
3/6✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 4 taken 236 times.
✗ Branch 5 not taken.
✓ Branch 7 taken 236 times.
✗ Branch 8 not taken.
|
236 | std::cout << "Best unaligned arch: " << best_arch_u << std::endl; |
| 881 | |||
| 882 |
3/4✓ Branch 1 taken 236 times.
✗ Branch 2 not taken.
✓ Branch 3 taken 210 times.
✓ Branch 4 taken 26 times.
|
236 | if (puppet_master_name == "NULL") { |
| 883 |
1/2✓ Branch 2 taken 210 times.
✗ Branch 3 not taken.
|
210 | results->back().config_name = name; |
| 884 | } else { | ||
| 885 |
1/2✓ Branch 2 taken 26 times.
✗ Branch 3 not taken.
|
26 | results->back().config_name = puppet_master_name; |
| 886 | } | ||
| 887 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->back().best_arch_a = best_arch_a; |
| 888 |
1/2✓ Branch 2 taken 236 times.
✗ Branch 3 not taken.
|
236 | results->back().best_arch_u = best_arch_u; |
| 889 | |||
| 890 | 236 | return fail_global; | |
| 891 | 236 | } | |
| 892 |