CUDA: fix FA kernel selection logic (#21271)

This commit is contained in:
Johannes Gäßler 2026-04-01 21:28:19 +02:00 committed by GitHub
parent 6de97b9d3e
commit 86221cf6da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -340,7 +340,14 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
case 128:
case 112:
case 256:
if (V->ne[0] != K->ne[0]) {
return BEST_FATTN_KERNEL_NONE;
}
break;
case 512:
if (V->ne[0] != K->ne[0]) {
return BEST_FATTN_KERNEL_NONE;
}
if (!gqa_opt_applies) {
return BEST_FATTN_KERNEL_NONE;
}