llama-fit-params: free memory target per device (#18679)

2026-01-08 10:07:58 +01:00 · 2026-01-08 10:07:58 +01:00 · 64848deb18
commit 64848deb18
parent 9a5724dee2
6 changed files with 83 additions and 39 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -495,7 +495,7 @@ extern "C" {
                    struct llama_context_params * cparams,
                                          float * tensor_split,          // writable buffer for tensor split, needs at least llama_max_devices elements
        struct llama_model_tensor_buft_override * tensor_buft_overrides, // writable buffer for overrides, needs at least llama_max_tensor_buft_overrides elements
-                                         size_t   margin,                // margin of memory to leave per device in bytes
+                                         size_t * margins,               // margins of memory to leave per device in bytes
                                       uint32_t   n_ctx_min,             // minimum context size to set when trying to reduce memory use
                            enum ggml_log_level   log_level);            // minimum log level to print during fitting, lower levels go to debug log