benches : update models + numbers (#19359)

* bench : update script * benches : update numbers
2026-02-05 14:34:07 +02:00 · 2026-02-05 14:34:07 +02:00 · 3795cc1e89
commit 3795cc1e89
parent b828e18c75
3 changed files with 541 additions and 188 deletions
--- a/scripts/bench-models.sh
+++ b/scripts/bench-models.sh
@ -7,47 +7,54 @@ ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32"
 ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32"

 QUICK=0
+DIO=0
 while (( "$#" )); do
-  case "$1" in
-    --quick) QUICK=1; shift ;;
-    *) shift ;;
-  esac
+    case "$1" in
+        --quick) QUICK=1; shift ;;
+        --dio) DIO=1; shift ;;
+        *) shift ;;
+    esac
 done

 if (( QUICK )); then
-  ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
-  ARGS_B="-d 0 -p 2048 -n 32"
+    ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
+    ARGS_B="-d 0 -p 2048 -n 32"
+fi
+
+if (( DIO )); then
+    ARGS_BB="${ARGS_BB} --no-mmap --direct-io"
+    ARGS_B="${ARGS_B} -mmp 0 -dio 1"
 fi

 run_model() {
-  local HFR=$1
-  local HFF=$2
+    local HFR=$1
+    local HFF=$2

-  printf "## ${HFR}\n" | tee -a "$RESULTS"
-  printf "\n" | tee -a "$RESULTS"
-  printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
-  printf "\n" | tee -a "$RESULTS"
+    printf "## ${HFR}\n" | tee -a "$RESULTS"
+    printf "\n" | tee -a "$RESULTS"
+    printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
+    printf "\n" | tee -a "$RESULTS"

-  printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
-  printf "\n" | tee -a "$RESULTS"
+    printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
+    printf "\n" | tee -a "$RESULTS"

-  ./bin/llama-batched-bench \
-    -hfr "${HFR}" -hff "${HFF}" \
-    -m "${HFF}" -fa 1 -ub 2048 --no-mmap \
-    ${ARGS_BB} | tee -a "$RESULTS"
+    ./bin/llama-batched-bench \
+        -hfr "${HFR}" -hff "${HFF}" \
+        -m "${HFF}" -fa 1 -ub 2048 \
+        ${ARGS_BB} | tee -a "$RESULTS"

-  printf "\n" | tee -a "$RESULTS"
+    printf "\n" | tee -a "$RESULTS"

-  printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
-  printf "\n" | tee -a "$RESULTS"
+    printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
+    printf "\n" | tee -a "$RESULTS"

-  ./bin/llama-bench \
-    -m "${HFF}" -fa 1 -ub 2048 -mmp 0 \
-    ${ARGS_B} | tee -a "$RESULTS"
+    ./bin/llama-bench \
+        -m "${HFF}" -fa 1 -ub 2048 \
+        ${ARGS_B} | tee -a "$RESULTS"

-  printf "\n" | tee -a "$RESULTS"
+    printf "\n" | tee -a "$RESULTS"

-  printf "\n"
+    printf "\n"
 }

 run_model "ggml-org/gpt-oss-20b-GGUF"                       "gpt-oss-20b-mxfp4.gguf"
@ -55,6 +62,7 @@ run_model "ggml-org/gpt-oss-120b-GGUF"                      "gpt-oss-120b-mxfp4-
 run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf"
 run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"             "qwen2.5-coder-7b-q8_0.gguf"
 run_model "ggml-org/gemma-3-4b-it-qat-GGUF"                 "gemma-3-4b-it-qat-Q4_0.gguf"
+run_model "ggml-org/GLM-4.7-Flash-GGUF"                     "GLM-4.7-Flash-Q8_0.gguf"

 if [[ -f models-extra.txt ]]; then
    while read -r HFR HFF; do