model: support GLM4V vision encoder (#18042)
* convert ok * no deepstack * less new tensors * cgraph ok * add mrope for text model * faster patch merger * add GGML_ROPE_TYPE_MRNORM * add support for metal * move glm4v do dedicated graph * convert: add norm_embd * clip: add debugging fn * working correctly * fix style * use bicubic * fix mrope metal * improve cpu * convert to neox ordering on conversion * revert backend changes * force stop if using old weight * support moe variant * fix conversion * fix convert (2) * Update tools/mtmd/clip-graph.h Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * process mrope_section on TextModel base class * resolve conflict merge --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
9963b81f63
commit
3d86c6c2b5
17 changed files with 412 additions and 79 deletions
|
|
@ -1689,7 +1689,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
} break;
|
||||
case LLM_ARCH_GLM4:
|
||||
{
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_sections, 4, false);
|
||||
switch (hparams.n_layer) {
|
||||
case 40: type = LLM_TYPE_9B; break;
|
||||
case 61: type = LLM_TYPE_32B; break;
|
||||
|
|
@ -1698,8 +1699,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
} break;
|
||||
case LLM_ARCH_GLM4_MOE:
|
||||
{
|
||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
|
||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_sections, 4, false);
|
||||
|
||||
// MoE parameters
|
||||
ml.get_key(LLM_KV_EXPERT_COUNT, hparams.n_expert);
|
||||
|
|
@ -7792,7 +7794,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
|||
case LLM_ARCH_DEEPSEEK2:
|
||||
case LLM_ARCH_PLM:
|
||||
case LLM_ARCH_CHATGLM:
|
||||
case LLM_ARCH_GLM4:
|
||||
case LLM_ARCH_GRANITE:
|
||||
case LLM_ARCH_GRANITE_MOE:
|
||||
case LLM_ARCH_GRANITE_HYBRID:
|
||||
|
|
@ -7854,7 +7855,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
|||
case LLM_ARCH_LFM2:
|
||||
case LLM_ARCH_LFM2MOE:
|
||||
case LLM_ARCH_SMALLTHINKER:
|
||||
case LLM_ARCH_GLM4_MOE:
|
||||
case LLM_ARCH_SEED_OSS:
|
||||
case LLM_ARCH_GROVEMOE:
|
||||
case LLM_ARCH_APERTUS:
|
||||
|
|
@ -7871,6 +7871,11 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
|
|||
case LLM_ARCH_QWEN3VLMOE:
|
||||
return LLAMA_ROPE_TYPE_IMROPE;
|
||||
|
||||
case LLM_ARCH_GLM4:
|
||||
return model->hparams.use_mrope() ? LLAMA_ROPE_TYPE_MROPE : LLAMA_ROPE_TYPE_NORM;
|
||||
case LLM_ARCH_GLM4_MOE:
|
||||
return model->hparams.use_mrope() ? LLAMA_ROPE_TYPE_MROPE : LLAMA_ROPE_TYPE_NEOX;
|
||||
|
||||
// all model arches should be listed explicitly here
|
||||
case LLM_ARCH_UNKNOWN:
|
||||
GGML_ABORT("unknown architecture");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue