spec : remove check rate (#19377)

* spec: remove parameter spec-ngram-check-rate

* spec : renamed statistics vars

* spec : add n_call_begin, n_call_accept

* spec : don't enable key-map-stats
This commit is contained in:
Sascha Rogmann 2026-02-09 14:30:50 +01:00 committed by GitHub
parent 81ddc60cb3
commit 292f6908cd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 36 additions and 62 deletions

View file

@ -80,7 +80,6 @@ json task_params::to_json(bool only_metrics) const {
{"speculative.type", common_speculative_type_to_str(speculative.type)},
{"speculative.ngram_size_n", speculative.ngram_size_n},
{"speculative.ngram_size_m", speculative.ngram_size_m},
{"speculative.ngram_c_rate", speculative.ngram_check_rate},
{"speculative.ngram_m_hits", speculative.ngram_min_hits},
{"timings_per_token", timings_per_token},
{"post_sampling_probs", post_sampling_probs},
@ -144,7 +143,6 @@ json task_params::to_json(bool only_metrics) const {
{"speculative.type", common_speculative_type_to_str(speculative.type)},
{"speculative.ngram_size_n", speculative.ngram_size_n},
{"speculative.ngram_size_m", speculative.ngram_size_m},
{"speculative.ngram_c_rate", speculative.ngram_check_rate},
{"speculative.ngram_m_hits", speculative.ngram_min_hits},
{"timings_per_token", timings_per_token},
{"post_sampling_probs", post_sampling_probs},
@ -257,12 +255,10 @@ task_params server_task::params_from_json_cmpl(
params.speculative.ngram_size_n = json_value(data, "speculative.ngram_size_n", defaults.speculative.ngram_size_n);
params.speculative.ngram_size_m = json_value(data, "speculative.ngram_size_m", defaults.speculative.ngram_size_m);
params.speculative.ngram_check_rate = json_value(data, "speculative.ngram_c_rate", defaults.speculative.ngram_check_rate);
params.speculative.ngram_min_hits = json_value(data, "speculative.ngram_m_hits", defaults.speculative.ngram_min_hits);
params.speculative.ngram_size_n = std::max(std::min(1, (int) params.speculative.ngram_size_n), 1024);
params.speculative.ngram_size_m = std::max(std::min(1, (int) params.speculative.ngram_size_m), 1024);
params.speculative.ngram_check_rate = std::max(std::min(1, (int) params.speculative.ngram_check_rate), 1024);
params.speculative.ngram_min_hits = std::max(std::min(1, (int) params.speculative.ngram_min_hits), 1024);
// Use OpenAI API logprobs only if n_probs wasn't provided