graph : add optional scale parameter to build_lora_mm [no ci] (#20427)
This commit is contained in:
parent
4a748b8f15
commit
1eea6a2968
6 changed files with 23 additions and 59 deletions
|
|
@ -29,10 +29,7 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa
|
|||
// self-attention
|
||||
{
|
||||
// compute Q and K and RoPE them
|
||||
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur);
|
||||
if (model.layers[il].wq_s) {
|
||||
Qcur = ggml_mul(ctx0, Qcur, model.layers[il].wq_s);
|
||||
}
|
||||
ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur, model.layers[il].wq_s);
|
||||
cb(Qcur, "Qcur", il);
|
||||
if (model.layers[il].bq) {
|
||||
Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq);
|
||||
|
|
@ -40,10 +37,7 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa
|
|||
}
|
||||
|
||||
// B1.K
|
||||
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur);
|
||||
if (model.layers[il].wk_s) {
|
||||
Kcur = ggml_mul(ctx0, Kcur, model.layers[il].wk_s);
|
||||
}
|
||||
ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur, model.layers[il].wk_s);
|
||||
cb(Kcur, "Kcur", il);
|
||||
if (model.layers[il].bk) {
|
||||
Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk);
|
||||
|
|
@ -51,10 +45,7 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa
|
|||
}
|
||||
|
||||
// B1.V
|
||||
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur);
|
||||
if (model.layers[il].wv_s) {
|
||||
Vcur = ggml_mul(ctx0, Vcur, model.layers[il].wv_s);
|
||||
}
|
||||
ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur, model.layers[il].wv_s);
|
||||
cb(Vcur, "Vcur", il);
|
||||
if (model.layers[il].bv) {
|
||||
Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv);
|
||||
|
|
@ -90,10 +81,7 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa
|
|||
LLM_NORM_RMS, il);
|
||||
cb(cur, "attn_sub_norm", il);
|
||||
|
||||
cur = build_lora_mm(model.layers[il].wo, cur);
|
||||
if (model.layers[il].wo_s) {
|
||||
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
|
||||
}
|
||||
cur = build_lora_mm(model.layers[il].wo, cur, model.layers[il].wo_s);
|
||||
if (model.layers[il].bo) {
|
||||
cur = ggml_add(ctx0, cur, model.layers[il].bo);
|
||||
}
|
||||
|
|
@ -127,10 +115,7 @@ llm_build_bitnet::llm_build_bitnet(const llama_model & model, const llm_graph_pa
|
|||
LLM_NORM_RMS, il);
|
||||
cb(cur, "ffn_sub_norm", il);
|
||||
|
||||
cur = build_lora_mm(model.layers[il].ffn_down, cur);
|
||||
if (model.layers[il].ffn_down_s) {
|
||||
cur = ggml_mul(ctx0, cur, model.layers[il].ffn_down_s);
|
||||
}
|
||||
cur = build_lora_mm(model.layers[il].ffn_down, cur, model.layers[il].ffn_down_s);
|
||||
cb(cur, "ffn_down", il);
|
||||
|
||||
cur = ggml_add(ctx0, cur, ffn_inp);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue