common/parser: handle reasoning budget (#20297)

* v1

* Finished!

* Handlie cli

* Reasoning sampler

* Apply suggestions from code review

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* Less explosive terminology :)

* Add utf-8 case and tests

* common : migrate reasoning budget sampler to common

* cont : clean up

* cont : expose state and allow passing as initial state

* cont : remove unused imports

* cont : update state machine doc string

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
Co-authored-by: Alde Rojas <hello@alde.dev>
This commit is contained in:
Piotr Wilkin (ilintar) 2026-03-11 10:26:12 +01:00 committed by GitHub
parent 5f91b1d5d5
commit acb7c79069
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 670 additions and 10 deletions

View file

@ -1101,6 +1101,22 @@ json oaicompat_chat_params_parse(
llama_params["chat_parser"] = chat_params.parser;
}
// Reasoning budget: pass parameters through to sampling layer
{
int reasoning_budget = opt.reasoning_budget;
if (reasoning_budget == -1 && body.contains("thinking_budget_tokens")) {
reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
}
if (reasoning_budget >= 0 && !chat_params.thinking_end_tag.empty()) {
llama_params["reasoning_budget_tokens"] = reasoning_budget;
llama_params["reasoning_budget_start_tag"] = chat_params.thinking_start_tag;
llama_params["reasoning_budget_end_tag"] = chat_params.thinking_end_tag;
llama_params["reasoning_budget_message"] = opt.reasoning_budget_message;
llama_params["reasoning_budget_activate_immediately"] = chat_params.thinking_forced_open;
}
}
// Handle "logprobs" field
// TODO: The response format of this option is not yet OAI-compatible, but seems like no one really using it; We may need to fix it in the future
if (json_value(body, "logprobs", false)) {