llama-fit-params: free memory target per device (#18679)
This commit is contained in:
parent
9a5724dee2
commit
64848deb18
6 changed files with 83 additions and 39 deletions
|
|
@ -495,7 +495,7 @@ extern "C" {
|
|||
struct llama_context_params * cparams,
|
||||
float * tensor_split, // writable buffer for tensor split, needs at least llama_max_devices elements
|
||||
struct llama_model_tensor_buft_override * tensor_buft_overrides, // writable buffer for overrides, needs at least llama_max_tensor_buft_overrides elements
|
||||
size_t margin, // margin of memory to leave per device in bytes
|
||||
size_t * margins, // margins of memory to leave per device in bytes
|
||||
uint32_t n_ctx_min, // minimum context size to set when trying to reduce memory use
|
||||
enum ggml_log_level log_level); // minimum log level to print during fitting, lower levels go to debug log
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue