server: remove default "gpt-3.5-turbo" model name (#17668)

* server: remove default "gpt-3.5-turbo" model name

* do not reflect back model name from request

* fix test
This commit is contained in:
Xuan-Son Nguyen 2025-12-02 11:38:57 +01:00 committed by GitHub
parent fd3abe849e
commit 5d6bd842ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 40 additions and 18 deletions

View file

@ -17,6 +17,7 @@
#include <cinttypes>
#include <memory>
#include <unordered_set>
#include <filesystem>
// fix problem with std::min and std::max
#if defined(_WIN32)
@ -518,6 +519,8 @@ struct server_context_impl {
// Necessary similarity of prompt for slot selection
float slot_prompt_similarity = 0.0f;
std::string model_name; // name of the loaded model, to be used by API
common_chat_templates_ptr chat_templates;
oaicompat_parser_options oai_parser_opt;
@ -758,6 +761,18 @@ struct server_context_impl {
}
SRV_WRN("%s", "for more info see https://github.com/ggml-org/llama.cpp/pull/16391\n");
if (!params_base.model_alias.empty()) {
// user explicitly specified model name
model_name = params_base.model_alias;
} else if (!params_base.model.name.empty()) {
// use model name in registry format (for models in cache)
model_name = params_base.model.name;
} else {
// fallback: derive model name from file name
auto model_path = std::filesystem::path(params_base.model.path);
model_name = model_path.filename().string();
}
// thinking is enabled if:
// 1. It's not explicitly disabled (reasoning_budget == 0)
// 2. The chat template supports it
@ -2611,7 +2626,7 @@ static std::unique_ptr<server_res_generator> handle_completions_impl(
// OAI-compat
task.params.res_type = res_type;
task.params.oaicompat_cmpl_id = completion_id;
// oaicompat_model is already populated by params_from_json_cmpl
task.params.oaicompat_model = ctx_server.model_name;
tasks.push_back(std::move(task));
}
@ -2939,7 +2954,7 @@ void server_routes::init_routes() {
json data = {
{ "default_generation_settings", default_generation_settings_for_props },
{ "total_slots", ctx_server.params_base.n_parallel },
{ "model_alias", ctx_server.params_base.model_alias },
{ "model_alias", ctx_server.model_name },
{ "model_path", ctx_server.params_base.model.path },
{ "modalities", json {
{"vision", ctx_server.oai_parser_opt.allow_image},
@ -3181,8 +3196,8 @@ void server_routes::init_routes() {
json models = {
{"models", {
{
{"name", params.model_alias.empty() ? params.model.path : params.model_alias},
{"model", params.model_alias.empty() ? params.model.path : params.model_alias},
{"name", ctx_server.model_name},
{"model", ctx_server.model_name},
{"modified_at", ""},
{"size", ""},
{"digest", ""}, // dummy value, llama.cpp does not support managing model file's hash
@ -3204,7 +3219,7 @@ void server_routes::init_routes() {
{"object", "list"},
{"data", {
{
{"id", params.model_alias.empty() ? params.model.path : params.model_alias},
{"id", ctx_server.model_name},
{"object", "model"},
{"created", std::time(0)},
{"owned_by", "llamacpp"},
@ -3351,6 +3366,7 @@ void server_routes::init_routes() {
// write JSON response
json root = format_response_rerank(
body,
ctx_server.model_name,
responses,
is_tei_format,
documents,
@ -3613,7 +3629,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_embeddings_impl(cons
// write JSON response
json root = res_type == TASK_RESPONSE_TYPE_OAI_EMBD
? format_embeddings_response_oaicompat(body, responses, use_base64)
? format_embeddings_response_oaicompat(body, ctx_server.model_name, responses, use_base64)
: json(responses);
res->ok(root);
return res;