llama : update LoRA API. + fix excessive graph reserves (#19280)

* Refactoring to use new llama_put_adapter_loras

* cont : alternative lora API

---------

Co-authored-by: Jake Chavis <jakechavis6@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
agent-enemy-2 2026-02-14 03:06:27 -05:00 committed by GitHub
parent eb145c0753
commit 2d8015e8a4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 62 additions and 88 deletions

View file

@ -656,21 +656,12 @@ extern "C" {
// The following functions operate on a llama_context, hence the naming: llama_verb_...
// Add a loaded LoRA adapter to given context
// This will not modify model's weight
LLAMA_API int32_t llama_set_adapter_lora(
// Set LoRa adapters on the context. Will only modify if the adapters currently in context are different.
LLAMA_API int32_t llama_set_adapters_lora(
struct llama_context * ctx,
struct llama_adapter_lora * adapter,
float scale);
// Remove a specific LoRA adapter from given context
// Return -1 if the adapter is not present in the context
LLAMA_API int32_t llama_rm_adapter_lora(
struct llama_context * ctx,
struct llama_adapter_lora * adapter);
// Remove all LoRA adapters from given context
LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
struct llama_adapter_lora ** adapters,
size_t n_adapters,
float * scales);
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
// the currently loaded vector.
@ -678,7 +669,7 @@ extern "C" {
// to an n_embd x n_layers buffer starting from layer 1.
// il_start and il_end are the layer range the vector should apply to (both inclusive)
// See llama_control_vector_load in common to load a control vector.
LLAMA_API int32_t llama_apply_adapter_cvec(
LLAMA_API int32_t llama_set_adapter_cvec(
struct llama_context * ctx,
const float * data,
size_t len,