From be3acdbeaac6567dfa57816a8d7acc96d5f11289 Mon Sep 17 00:00:00 2001
From: Vitaly Chikunov <vt@altlinux.org>
Date: Mon, 26 Feb 2024 01:19:27 +0300
Subject: [PATCH] 20240225-alt1

- Update to b2259 (2024-02-25).
---
 .gear/llama.cpp.spec | 75 +++++++++++++++++++++++++++++---------------
 1 file changed, 50 insertions(+), 25 deletions(-)
diff --git a/.gear/llama.cpp.spec b/.gear/llama.cpp.spec
index 3709b335e..c482aec1c 100644
--- a/.gear/llama.cpp.spec
+++ b/.gear/llama.cpp.spec
@@ -4,7 +4,7 @@
 %set_verify_elf_method strict
 
 Name: llama.cpp
-Version: 20231019
+Version: 20240225
 Release: alt1
 Summary: Inference of LLaMA model in pure C/C++
 License: MIT
@@ -30,28 +30,23 @@ BuildRequires: gcc-c++
 %description
 Plain C/C++ implementation (of inference of LLaMA model) without
 dependencies. AVX, AVX2 and AVX512 support for x86 architectures.
-Mixed F16/F32 precision. 2-bit, 3-bit, 4-bit, 5-bit, 6-bit and 8-bit
-integer quantization support. Runs on the CPU. Supported models:
+Mixed F16/F32 precision. 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and
+8-bit integer quantization for faster inference and reduced memory use.
+Runs on the CPU.
 
-    LLaMA
-    LLaMA 2
-    Alpaca
-    GPT4All
-    Chinese LLaMA / Alpaca
-    Vigogne (French)
-    Vicuna
-    Koala
-    OpenBuddy (Multilingual)
-    Pygmalion / Metharme
-    WizardLM
-    Baichuan 1 & 2 + derivations
-    Aquila 1 & 2
-    Starcoder models
-    Mistral AI v0.1
-    Refact
-    Persimmon 8B
-    MPT
-    Bloom
+Supported models:
+
+   LLaMA, LLaMA 2, Mistral 7B, Mixtral MoE, Falcon, Chinese LLaMA /
+   Alpaca and Chinese LLaMA-2 / Alpaca-2, Vigogne (French), Koala,
+   Baichuan 1 & 2 + derivations, Aquila 1 & 2, Starcoder models, Refact,
+   Persimmon 8B, MPT, Bloom, Yi models, StableLM models, Deepseek models,
+   Qwen models, PLaMo-13B, Phi models, GPT-2, Orion 14B, InternLM2,
+   CodeShell, Gemma
+
+Multimodal models:
+
+   LLaVA 1.5 models, BakLLaVA, Obsidian, ShareGPT4V, MobileVLM 1.7B/3B
+   models, Yi-VL
 
 NOTE 1: You will need to:
 
@@ -65,7 +60,7 @@ NOTE 2:
 
   For example, LLaMA downloaded via public torrent link is 220 GB.
 
-Overall this is all raw and experimental, no warranty, no support.
+Overall this is all raw and EXPERIMENTAL, no warranty, no support.
 
 %prep
 %setup
@@ -90,20 +85,50 @@ cp -rp grammars -t %buildroot%_datadir/%name
 install -Dp examples/*.sh -t %buildroot%_datadir/%name/examples
 # Install and rename binaries to have llama- prefix.
 cd %_cmake__builddir/bin
-find -maxdepth 1 -type f -executable -printf '%f\0' |
+find -maxdepth 1 -type f -executable -not -name 'test-*' -printf '%f\0' |
 	xargs -0ti -n1 install -p {} %buildroot%_bindir/llama-{}
 
+mkdir -p %buildroot%_unitdir
+cat <<'EOF' >%buildroot%_unitdir/llama.service
+[Unit]
+Description=Llama.cpp server, CPU only (no GPU support in this build).
+After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
+
+[Service]
+Type=simple
+DynamicUser=true
+EnvironmentFile=%_sysconfdir/sysconfig/llama
+ExecStart=%_bindir/llama-server $LLAMA_ARGS
+ExecReload=/bin/kill -HUP $MAINPID
+Restart=never
+
+[Install]
+WantedBy=default.target
+EOF
+
+mkdir -p %buildroot%_sysconfdir/sysconfig
+cat <<EOF  > %buildroot%_sysconfdir/sysconfig/llama
+# Change to accessible path with a model.
+LLAMA_ARGS="-m %_datadir/%name/ggml-model-f32.bin"
+EOF
+
 %check
 %cmake_build --target test
 
 %files
 %define _customdocdir %_docdir/%name
-%doc LICENSE README.md SHA256SUMS docs
+%doc LICENSE README.md docs
 %_bindir/llama-*
 %_bindir/convert-*.py
+%_unitdir/llama.service
+%_sysconfdir/sysconfig/llama
+
 %_datadir/%name
 
 %changelog
+* Mon Feb 26 2024 Vitaly Chikunov <vt@altlinux.org> 20240225-alt1
+- Update to b2259 (2024-02-25).
+
 * Fri Oct 20 2023 Vitaly Chikunov <vt@altlinux.org> 20231019-alt1
 - Update to b1400 (2023-10-19).
 - Install experimental converters (convert- prefixed tools).