cuda : fix supports_op condition for get_rows when number of blocks is too large (#15868)

* cuda : fix supports_op condition for get_rows when src1->ne2 > 1 ggml-ci * ggml : add comment about ggml_get_rows ggml-ci * cuda : add FIXME [no ci] * cuda : update support condition ggml-ci
2025-09-08 13:56:51 +03:00 · 2025-09-08 13:56:51 +03:00 · b0d52998b9
commit b0d52998b9
parent f28d4f4ac9
3 changed files with 10 additions and 1 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@ -1529,7 +1529,11 @@ extern "C" {
            struct ggml_context * ctx,
            struct ggml_tensor  * a);

-    // supports 3D: a->ne[2] == b->ne[1]
+    // supports 4D a:
+    // a     [n_embd, ne1, ne2, ne3]
+    // b I32 [n_rows, ne2, ne3, 1]
+    //
+    // return [n_embd, n_rows, ne2, ne3]
    GGML_API struct ggml_tensor * ggml_get_rows(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,  // data