vulkan: Use fewer rows for scalar FA when HS is not a multiple of 16 (#17455)
This commit is contained in:
parent
877566d512
commit
d414db02d3
2 changed files with 10 additions and 5 deletions
|
|
@ -7859,6 +7859,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
|||
}
|
||||
}
|
||||
|
||||
// Qwen3-VL-8B https://github.com/ggml-org/llama.cpp/issues/17012
|
||||
test_cases.emplace_back(new test_flash_attn_ext(72, 72, 16, {1, 1}, 5776, 5776, false, false, 0, 0, GGML_PREC_F32, GGML_TYPE_F16));
|
||||
|
||||
for (int kv : { 4096, 8192, 16384, }) {
|
||||
for (int hs : { 64, 128, }) {
|
||||
for (int nr : { 1, 4, }) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue