llama: fix magic number of 999 for GPU layers (#18266)

* llama: fix magic number of 999 for GPU layers

* use strings for -ngl, -ngld

* enacapsulate n_gpu_layers, split_mode
This commit is contained in:
Johannes Gäßler 2025-12-27 20:18:35 +01:00 committed by GitHub
parent 06705fdcb3
commit 026d2ad472
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 46 additions and 22 deletions

View file

@ -2378,11 +2378,11 @@ void llama_model::load_vocab(llama_model_loader & ml) {
bool llama_model::load_tensors(llama_model_loader & ml) {
const auto & split_mode = params.split_mode;
const auto & n_gpu_layers = params.n_gpu_layers;
const auto & use_mlock = params.use_mlock;
const auto & tensor_split = params.tensor_split;
const int n_layer = hparams.n_layer;
const int n_layer = hparams.n_layer;
const int n_gpu_layers = this->n_gpu_layers();
const bool use_mmap_buffer = true;
@ -6884,6 +6884,14 @@ size_t llama_model::n_devices() const {
return devices.size();
}
uint32_t llama_model::n_gpu_layers() const {
return params.n_gpu_layers >= 0 ? params.n_gpu_layers : hparams.n_layer + 1;
}
llama_split_mode llama_model::split_mode() const {
return params.split_mode;
}
std::map<ggml_backend_buffer_type_t, size_t> llama_model::memory_breakdown() const {
std::map<ggml_backend_buffer_type_t, size_t> ret;
for (const auto & [ctx, bufs] : pimpl->ctxs_bufs) {
@ -7794,7 +7802,7 @@ llama_model_params llama_model_default_params() {
llama_model_params result = {
/*.devices =*/ nullptr,
/*.tensor_buft_overrides =*/ nullptr,
/*.n_gpu_layers =*/ 999,
/*.n_gpu_layers =*/ -1,
/*.split_mode =*/ LLAMA_SPLIT_MODE_LAYER,
/*.main_gpu =*/ 0,
/*.tensor_split =*/ nullptr,