arg: clarify auto kvu/np being set on server (#17997)
* arg: clarify auto kvu/np being set on server * improve docs * use invalid_argument
This commit is contained in:
parent
a5251ca11d
commit
7b1db3d3b7
6 changed files with 51 additions and 35 deletions
|
|
@ -73,13 +73,8 @@ int main(int argc, char ** argv, char ** envp) {
|
|||
return 1;
|
||||
}
|
||||
|
||||
// TODO: should we have a separate n_parallel parameter for the server?
|
||||
// https://github.com/ggml-org/llama.cpp/pull/16736#discussion_r2483763177
|
||||
// TODO: this is a common configuration that is suitable for most local use cases
|
||||
// however, overriding the parameters is a bit confusing - figure out something more intuitive
|
||||
if (params.n_parallel == 1 && params.kv_unified == false && !params.has_speculative()) {
|
||||
LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true (add -kvu to disable this)\n", __func__);
|
||||
|
||||
if (params.n_parallel < 0) {
|
||||
LOG_INF("%s: n_parallel is set to auto, using n_parallel = 4 and kv_unified = true\n", __func__);
|
||||
params.n_parallel = 4;
|
||||
params.kv_unified = true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue