llama: end-to-end tests (#19802)
* tests: add end-to-end tests per model architecture * fixup for rebase * fix use-after-free in llama-model-loader.cpp * fix CI * fix WebGPU * fix CI * disable CI for macOS-latest-cmake-arm64 * use expert_weights_scale only if != 0.0f * comments
This commit is contained in:
parent
a95047979a
commit
a976ff081b
33 changed files with 1607 additions and 633 deletions
|
|
@ -30,6 +30,7 @@ ggml_tensor * llm_build_mamba_base::build_mamba_layer(llm_graph_input_rs * inp,
|
|||
GGML_ASSERT(n_seqs != 0);
|
||||
GGML_ASSERT(ubatch.equal_seqs());
|
||||
GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs);
|
||||
GGML_ASSERT(d_inner % n_head == 0);
|
||||
|
||||
ggml_tensor * conv_states_all = mctx_cur->get_r_l(il);
|
||||
ggml_tensor * ssm_states_all = mctx_cur->get_s_l(il);
|
||||
|
|
@ -154,6 +155,7 @@ ggml_tensor * llm_build_mamba_base::build_mamba2_layer(llm_graph_input_rs * inp,
|
|||
|
||||
const auto kv_head = mctx_cur->get_head();
|
||||
|
||||
const int64_t n_embd = hparams.n_embd;
|
||||
const int64_t d_conv = hparams.ssm_d_conv;
|
||||
const int64_t d_inner = hparams.ssm_d_inner;
|
||||
const int64_t d_state = hparams.ssm_d_state;
|
||||
|
|
@ -167,6 +169,8 @@ ggml_tensor * llm_build_mamba_base::build_mamba2_layer(llm_graph_input_rs * inp,
|
|||
GGML_ASSERT(n_seqs != 0);
|
||||
GGML_ASSERT(ubatch.equal_seqs());
|
||||
GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs);
|
||||
GGML_ASSERT(d_inner % n_head == 0);
|
||||
GGML_ASSERT(d_inner % (n_group*n_embd) == 0);
|
||||
|
||||
ggml_tensor * conv_states_all = mctx_cur->get_r_l(il);
|
||||
ggml_tensor * ssm_states_all = mctx_cur->get_s_l(il);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue