llama: fix magic number of 999 for GPU layers (#18266)
* llama: fix magic number of 999 for GPU layers * use strings for -ngl, -ngld * enacapsulate n_gpu_layers, split_mode
This commit is contained in:
parent
06705fdcb3
commit
026d2ad472
7 changed files with 46 additions and 22 deletions
|
|
@ -2378,11 +2378,11 @@ void llama_model::load_vocab(llama_model_loader & ml) {
|
|||
|
||||
bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
const auto & split_mode = params.split_mode;
|
||||
const auto & n_gpu_layers = params.n_gpu_layers;
|
||||
const auto & use_mlock = params.use_mlock;
|
||||
const auto & tensor_split = params.tensor_split;
|
||||
|
||||
const int n_layer = hparams.n_layer;
|
||||
const int n_layer = hparams.n_layer;
|
||||
const int n_gpu_layers = this->n_gpu_layers();
|
||||
|
||||
const bool use_mmap_buffer = true;
|
||||
|
||||
|
|
@ -6884,6 +6884,14 @@ size_t llama_model::n_devices() const {
|
|||
return devices.size();
|
||||
}
|
||||
|
||||
uint32_t llama_model::n_gpu_layers() const {
|
||||
return params.n_gpu_layers >= 0 ? params.n_gpu_layers : hparams.n_layer + 1;
|
||||
}
|
||||
|
||||
llama_split_mode llama_model::split_mode() const {
|
||||
return params.split_mode;
|
||||
}
|
||||
|
||||
std::map<ggml_backend_buffer_type_t, size_t> llama_model::memory_breakdown() const {
|
||||
std::map<ggml_backend_buffer_type_t, size_t> ret;
|
||||
for (const auto & [ctx, bufs] : pimpl->ctxs_bufs) {
|
||||
|
|
@ -7794,7 +7802,7 @@ llama_model_params llama_model_default_params() {
|
|||
llama_model_params result = {
|
||||
/*.devices =*/ nullptr,
|
||||
/*.tensor_buft_overrides =*/ nullptr,
|
||||
/*.n_gpu_layers =*/ 999,
|
||||
/*.n_gpu_layers =*/ -1,
|
||||
/*.split_mode =*/ LLAMA_SPLIT_MODE_LAYER,
|
||||
/*.main_gpu =*/ 0,
|
||||
/*.tensor_split =*/ nullptr,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue