llama-cpp-turboquant/scripts/snapdragon/adb/run-cli.sh
nullname ed75977717
ggml-hexagon: create generalized functions for cpu side op (#17500)
* refactor: replace ggml_hexagon_mul_mat with template-based binary operation for improved flexibility

* refactor: replace ggml_hexagon_mul_mat_id with template-based binary operation for improved flexibility

* refactor: initialize buffer types and streamline dspqueue_buffers_init calls for clarity

* add comment

* refactor: remove redundant buffer checks in hexagon supported operations

* wip

* add missing include to fix weak symbol warning

* add ggml_hexagon_op_generic

* refactor: simplify tensor operation initialization and buffer management in hexagon implementation

* refactor: streamline hexagon operation initialization and buffer management

* refactor: update function signatures and streamline request handling in hexagon operations

* wip

* ggml-hexagon: clean up code formatting and improve unary operation handling

* wip

* rename

* fix: add support for permuted F16 tensors and enhance quantization checks in matrix operations

* refactor: replace ggml_hexagon_mul_mat with template-based binary operation for improved flexibility

refactor: replace ggml_hexagon_mul_mat_id with template-based binary operation for improved flexibility

refactor: initialize buffer types and streamline dspqueue_buffers_init calls for clarity

refactor: remove redundant buffer checks in hexagon supported operations

add missing include to fix weak symbol warning

add ggml_hexagon_op_generic

refactor: simplify tensor operation initialization and buffer management in hexagon implementation

refactor: streamline hexagon operation initialization and buffer management

refactor: update function signatures and streamline request handling in hexagon operations

ggml-hexagon: clean up code formatting and improve unary operation handling

fix: add support for permuted F16 tensors and enhance quantization checks in matrix operations

# Conflicts:
#	ggml/src/ggml-hexagon/ggml-hexagon.cpp

* hexagon: fix merge conflicts

* hexagon: minor cleanup for buffer support checks

* hexagon: factor out op_desc and the overal op logging

* hexagon: further simplify and cleanup op dispatch logic

* snapdragon: update adb scripts to use llama-cli and llama-completion

* fix pipeline failure

---------

Co-authored-by: Max Krasnyansky <maxk@qti.qualcomm.com>
2025-12-22 23:13:24 -08:00

53 lines
1.3 KiB
Bash
Executable file

#!/bin/sh
#
# Basedir on device
basedir=/data/local/tmp/llama.cpp
cli_opts=
branch=.
[ "$B" != "" ] && branch=$B
adbserial=
[ "$S" != "" ] && adbserial="-s $S"
model="Llama-3.2-3B-Instruct-Q4_0.gguf"
[ "$M" != "" ] && model="$M"
device="HTP0"
[ "$D" != "" ] && device="$D"
experimental=
[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
verbose=
[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
sched=
[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
profile=
[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
opmask=
[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
nhvx=
[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
ndev=
[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
set -x
adb $adbserial shell " \
cd $basedir; ulimit -c unlimited; \
LD_LIBRARY_PATH=$basedir/$branch/lib \
ADSP_LIBRARY_PATH=$basedir/$branch/lib \
$verbose $experimental $sched $opmask $profile $nhvx $ndev \
./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
--poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
--ctx-size 8192 --batch-size 128 -fa on \
-ngl 99 --device $device $cli_opts $@ \
"