vulkan: Use fewer rows for scalar FA when HS is not a multiple of 16 (#17455)

This commit is contained in:
Jeff Bolz 2025-11-25 00:11:27 -06:00 committed by GitHub
parent 877566d512
commit d414db02d3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 10 additions and 5 deletions

View file

@ -7859,6 +7859,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
}
}
// Qwen3-VL-8B https://github.com/ggml-org/llama.cpp/issues/17012
test_cases.emplace_back(new test_flash_attn_ext(72, 72, 16, {1, 1}, 5776, 5776, false, false, 0, 0, GGML_PREC_F32, GGML_TYPE_F16));
for (int kv : { 4096, 8192, 16384, }) {
for (int hs : { 64, 128, }) {
for (int nr : { 1, 4, }) {