llama-fit-params: free memory target per device (#18679)

This commit is contained in:
Johannes Gäßler 2026-01-08 10:07:58 +01:00 committed by GitHub
parent 9a5724dee2
commit 64848deb18
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 83 additions and 39 deletions

View file

@ -495,7 +495,7 @@ extern "C" {
struct llama_context_params * cparams,
float * tensor_split, // writable buffer for tensor split, needs at least llama_max_devices elements
struct llama_model_tensor_buft_override * tensor_buft_overrides, // writable buffer for overrides, needs at least llama_max_tensor_buft_overrides elements
size_t margin, // margin of memory to leave per device in bytes
size_t * margins, // margins of memory to leave per device in bytes
uint32_t n_ctx_min, // minimum context size to set when trying to reduce memory use
enum ggml_log_level log_level); // minimum log level to print during fitting, lower levels go to debug log