common : inhibit lazy grammar sampler while reasoning is active (#20970)
* common : inhibit grammar while reasoning budget is active * cont : update force_pos in accept * cont : fix tests * cont : tweak should apply logic * cont : return early not using grammar sampler * Add tests * cont : prevent backend sampling when reasoning budget enabled * cont : fix typo --------- Co-authored-by: Piotr Wilkin <piotr.wilkin@syndatis.com>
This commit is contained in:
parent
ff934e29bc
commit
59d840209a
8 changed files with 295 additions and 106 deletions
|
|
@ -100,7 +100,7 @@ struct cli_context {
|
|||
}
|
||||
|
||||
// reasoning budget sampler
|
||||
if (reasoning_budget >= 0 && !chat_params.thinking_end_tag.empty()) {
|
||||
if (!chat_params.thinking_end_tag.empty()) {
|
||||
const llama_vocab * vocab = llama_model_get_vocab(
|
||||
llama_get_model(ctx_server.get_llama_context()));
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue