benches : update models + numbers (#19359)
* bench : update script * benches : update numbers
This commit is contained in:
parent
b828e18c75
commit
3795cc1e89
3 changed files with 541 additions and 188 deletions
60
scripts/bench-models.sh
Normal file → Executable file
60
scripts/bench-models.sh
Normal file → Executable file
|
|
@ -7,47 +7,54 @@ ARGS_BB="-c 270336 -npp 512,4096,8192 -npl 1,2,4,8,16,32 -ntg 32"
|
|||
ARGS_B="-d 0,4096,8192,16384,32768 -p 2048 -n 32"
|
||||
|
||||
QUICK=0
|
||||
DIO=0
|
||||
while (( "$#" )); do
|
||||
case "$1" in
|
||||
--quick) QUICK=1; shift ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
case "$1" in
|
||||
--quick) QUICK=1; shift ;;
|
||||
--dio) DIO=1; shift ;;
|
||||
*) shift ;;
|
||||
esac
|
||||
done
|
||||
|
||||
if (( QUICK )); then
|
||||
ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
|
||||
ARGS_B="-d 0 -p 2048 -n 32"
|
||||
ARGS_BB="-c 20480 -npp 512,4096 -npl 1,2,4 -ntg 32"
|
||||
ARGS_B="-d 0 -p 2048 -n 32"
|
||||
fi
|
||||
|
||||
if (( DIO )); then
|
||||
ARGS_BB="${ARGS_BB} --no-mmap --direct-io"
|
||||
ARGS_B="${ARGS_B} -mmp 0 -dio 1"
|
||||
fi
|
||||
|
||||
run_model() {
|
||||
local HFR=$1
|
||||
local HFF=$2
|
||||
local HFR=$1
|
||||
local HFF=$2
|
||||
|
||||
printf "## ${HFR}\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf "## ${HFR}\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf "Model: https://huggingface.co/${HFR}\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
|
||||
printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf -- "- \`llama-batched-bench\`\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
|
||||
./bin/llama-batched-bench \
|
||||
-hfr "${HFR}" -hff "${HFF}" \
|
||||
-m "${HFF}" -fa 1 -ub 2048 --no-mmap \
|
||||
${ARGS_BB} | tee -a "$RESULTS"
|
||||
./bin/llama-batched-bench \
|
||||
-hfr "${HFR}" -hff "${HFF}" \
|
||||
-m "${HFF}" -fa 1 -ub 2048 \
|
||||
${ARGS_BB} | tee -a "$RESULTS"
|
||||
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
|
||||
printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf -- "- \`llama-bench\`\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
|
||||
./bin/llama-bench \
|
||||
-m "${HFF}" -fa 1 -ub 2048 -mmp 0 \
|
||||
${ARGS_B} | tee -a "$RESULTS"
|
||||
./bin/llama-bench \
|
||||
-m "${HFF}" -fa 1 -ub 2048 \
|
||||
${ARGS_B} | tee -a "$RESULTS"
|
||||
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
printf "\n" | tee -a "$RESULTS"
|
||||
|
||||
printf "\n"
|
||||
printf "\n"
|
||||
}
|
||||
|
||||
run_model "ggml-org/gpt-oss-20b-GGUF" "gpt-oss-20b-mxfp4.gguf"
|
||||
|
|
@ -55,6 +62,7 @@ run_model "ggml-org/gpt-oss-120b-GGUF" "gpt-oss-120b-mxfp4-
|
|||
run_model "ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF" "qwen3-coder-30b-a3b-instruct-q8_0.gguf"
|
||||
run_model "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF" "qwen2.5-coder-7b-q8_0.gguf"
|
||||
run_model "ggml-org/gemma-3-4b-it-qat-GGUF" "gemma-3-4b-it-qat-Q4_0.gguf"
|
||||
run_model "ggml-org/GLM-4.7-Flash-GGUF" "GLM-4.7-Flash-Q8_0.gguf"
|
||||
|
||||
if [[ -f models-extra.txt ]]; then
|
||||
while read -r HFR HFF; do
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue