graph: add f_attn_temp_offset (#18025)

This commit is contained in:
Xuan-Son Nguyen 2025-12-14 13:05:59 +01:00 committed by GitHub
parent 254098a279
commit 0759b09c90
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 11 additions and 4 deletions

View file

@ -668,6 +668,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
hparams.n_swa = 8192;
hparams.n_attn_temp_floor_scale = 8192;
hparams.f_attn_temp_scale = 0.1f;
hparams.f_attn_temp_offset = 1.0f;
hparams.set_swa_pattern(4); // pattern: 3 chunked - 1 full
}
@ -1646,6 +1647,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale, false);
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_LENGTH, hparams.n_attn_temp_floor_scale, false);
hparams.f_attn_temp_offset = 0.0f;
switch (hparams.n_layer) {
case 27: type = LLM_TYPE_16B; break;
case 60: type = LLM_TYPE_236B; break;
@ -2276,6 +2279,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow, false);
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f);
hparams.f_attn_temp_offset = 0.0f;
// TODO: maybe add n_attn_temp_floor_scale as a separate KV?
if (hparams.f_attn_temp_scale != 0.0f) {
hparams.n_attn_temp_floor_scale = hparams.n_ctx_orig_yarn;