vulkan: improve topk perf for large k, fix overflow in unit tests (#17582)
This commit is contained in:
parent
d82b7a7c1d
commit
59d8d4e963
2 changed files with 8 additions and 4 deletions
|
|
@ -1446,14 +1446,14 @@ struct test_case {
|
|||
const uint64_t target_flops_cpu = 8ULL * GFLOP;
|
||||
const uint64_t target_flops_gpu = 100ULL * GFLOP;
|
||||
uint64_t target_flops = is_cpu ? target_flops_cpu : target_flops_gpu;
|
||||
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
||||
n_runs = (int)std::min<int64_t>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_flops / op_flops(out)) + 1;
|
||||
} else {
|
||||
// based on memory size
|
||||
const size_t GB = 1ULL << 30;
|
||||
const size_t target_size_cpu = 8 * GB;
|
||||
const size_t target_size_gpu = 32 * GB;
|
||||
size_t target_size = is_cpu ? target_size_cpu : target_size_gpu;
|
||||
n_runs = std::min<int>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
||||
n_runs = (int)std::min<int64_t>(ggml_graph_size(gf) - ggml_graph_n_nodes(gf), target_size / op_size(out)) + 1;
|
||||
}
|
||||
|
||||
// duplicate the op
|
||||
|
|
@ -8043,7 +8043,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
}
|
||||
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
||||
for (auto k : {1, 10, 40}) {
|
||||
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2, 1, 1, 1}, 1));
|
||||
for (auto k : {1, 10, 40, 400}) {
|
||||
for (auto nrows : {1, 16}) {
|
||||
for (auto cols : {k, 1000, 65000, 200000}) {
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {cols, nrows, 1, 1}, k));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue