diff --git a/.gear/llama-cpp-turboquant.spec b/.gear/llama-cpp-turboquant.spec new file mode 100644 index 000000000..b57b20838 --- /dev/null +++ b/.gear/llama-cpp-turboquant.spec @@ -0,0 +1,354 @@ +# SPDX-License-Identifier: GPL-2.0-only +%define _unpackaged_files_terminate_build 1 +%define _stripped_files_terminate_build 1 +%set_verify_elf_method strict + +%ifarch x86_64 +%def_with cuda +%else +%def_without cuda +%endif +%def_with vulkan + +Name: llama-cpp-turboquant +Version: 8659 +Release: alt1 +Epoch: 1 +Summary: LLM inference in C/C++ +License: MIT +Group: Sciences/Computer science +# https://ggml.ai/ +Url: https://github.com/ggerganov/llama.cpp +ExcludeArch: %ix86 +Requires: %name-cpu = %EVR +%if_with cuda +Requires: %name-cuda = %EVR +%filter_from_requires /(libcudart\.so\.12)/d +%filter_from_requires /debug64(libcuda\.so\.1)/d +%endif +%if_with vulkan +Requires: %name-vulkan = %EVR +%endif + +Source: %name-%version.tar +Patch: %name-%version.patch + +BuildRequires(pre): rpm-macros-cmake +BuildRequires: cmake +BuildRequires: gcc-c++ +BuildRequires: help2man +BuildRequires: libcurl-devel +BuildRequires: libgomp-devel +BuildRequires: libssl-devel +BuildRequires: libstdc++-devel-static +%if_with cuda +BuildRequires: gcc12-c++ +BuildRequires: nvidia-cuda-devel-static +%endif +%if_with vulkan +BuildRequires: glslc +BuildRequires: libvulkan-devel +%endif +%{?!_without_check:%{?!_disable_check: +BuildRequires: ctest +BuildRequires: python3-module-jinja2 +BuildRequires: tinyllamas-gguf +}} + +%description +Plain C/C++ implementation (of inference of many LLM models) without +dependencies. AVX, AVX2, AVX512, and AMX support for x86 architectures. +Mixed F16/F32 precision. 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and +8-bit integer quantization for faster inference and reduced memory use. +Supports CPU, GPU, and hybrid CPU+GPU inference. + +Supported models: + + LLaMA models, Mistral 7B, Mixtral MoE, Falcon, Chinese LLaMA / + Alpaca and Chinese LLaMA-2 / Alpaca-2, Vigogne (French), Koala, + Baichuan 1 & 2 + derivations, Aquila 1 & 2, Starcoder models, Refact, + Persimmon 8B, MPT, Bloom, Yi models, StableLM models, Deepseek models, + Qwen models, PLaMo-13B, Phi models, GPT-2, Orion 14B, InternLM2, + CodeShell, Gemma, Mamba, Grok-1, Xverse, Command-R models, SEA-LION, + GritLM-7B + GritLM-8x7B, OLMo, GPT-NeoX + Pythia, Snowflake-Arctic + MoE, Smaug, Poro 34B, Bitnet b1.58 models, Flan T5, Open Elm models, + ChatGLM3-6b + ChatGLM4-9b + GLMEdge-1.5b + GLMEdge-4b, SmolLM, + EXAONE-3.0-7.8B-Instruct, FalconMamba Models, Jais, Bielik-11B-v2.3, + RWKV-6, QRWKV-6, GigaChat-20B-A3B, Trillion-7B-preview, Ling models, + LFM2 models, Hunyuan models, BailingMoeV2 (Ring/Ling 2.0) models + +Multimodal models: + + LLaVA 1.5 models, BakLLaVA, Obsidian, ShareGPT4V, MobileVLM 1.7B/3B + models, Yi-VL, Mini CPM, Moondream, Bunny, GLM-EDGE, Qwen2-VL, + LFM2-VL + +NOTE: + MODELS ARE NOT PROVIDED. You'll need to download them from the original + sites (or Hugging Face Hub). + +Overall this is all raw and EXPERIMENTAL, no warranty, no support. + +%package -n libllama-turboquant +Summary: Shared libraries for llama-cpp-turboquant +Group: System/Libraries + +%description -n libllama-turboquant +%summary. + +%package -n libllama-turboquant-devel +Summary: Development files for llama.cpp +Group: Development/C +Requires: libllama = %EVR +Conflicts: libwhisper-cpp-devel + +%description -n libllama-turboquant-devel +%summary. + +%package cpu +Summary: %name tools including backend for CPU +Group: Sciences/Computer science +Requires: libllama-turboquant = %EVR +Conflicts: %name-convert < %EVR +AutoReqProv: nopython3 +%add_findreq_skiplist %_datadir/%name/examples/* + +%description cpu +%summary. + +%package cuda +Summary: %name backend for NVIDIA GPU +Group: Sciences/Computer science +Requires: libnvidia-ptxjitcompiler +Requires: %name-cpu = %EVR + +%description cuda +%summary. + +%package vulkan +Summary: %name backend for GPU +Group: Sciences/Computer science +Requires: %name-cpu = %EVR + +%description vulkan +%summary. + +%prep +%setup +%autopatch -p1 +commit=$(awk '$2=="b%version"{print$1}' .gear/tags/list) +cat <<-EOF >> cmake/build-info.cmake + set(BUILD_NUMBER %version) + set(GGML_BUILD_NUMBER %version) + set(BUILD_COMMIT "${commit::8} [%release]") +EOF +sed -i '/POSITION_INDEPENDENT_CODE/s/PROPERTIES/& SOVERSION 0.0.%version/' src/CMakeLists.txt +sed -i 's/POSITION_INDEPENDENT_CODE/SOVERSION 0.0.%version &/' tools/mtmd/CMakeLists.txt +# We do not have Internet access (issues/13371). +perl -00 -ni -e 'print unless /_URL/' tests/test-arg-parser.cpp +# This test requires GPU. +sed /test-thread-safety/d -i tests/CMakeLists.txt + +%build +%define optflags_debug -g1 +# Unless -DCMAKE_SKIP_BUILD_RPATH=yes CMake fails to strip build time RPATH +# from (installed) binaries. +export NVCC_PREPEND_FLAGS=-ccbin=g++-12 +%cmake \ + -DCMAKE_SKIP_BUILD_RPATH=yes \ + -DLLAMA_BUILD_TESTS=ON \ + -DGGML_BACKEND_DL=ON \ + -DGGML_BACKEND_DIR=%_libexecdir/llama-tq \ + -DGGML_CPU=ON \ + -DGGML_RPC=ON \ +%ifarch x86_64 + -DGGML_CPU_ALL_VARIANTS=ON \ +%endif +%if_with cuda + -DGGML_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES='52-virtual;80-virtual' \ +%endif +%if_with vulkan + -DGGML_VULKAN=ON \ +%endif + %nil +grep -E 'LLAMA|GGML' %_cmake__builddir/CMakeCache.txt | sort | tee build-options.txt +%cmake_build +find -name '*.py' | xargs sed -i '1s|#!/usr/bin/env python3|#!%__python3|' +LD_LIBRARY_PATH=%_cmake__builddir/bin %_cmake__builddir/bin/llama-tq-server --completion-bash > llama-tq.bash +LD_LIBRARY_PATH=%_cmake__builddir/bin .gear/gen-manpage %_cmake__builddir/bin/llama-tq-server > llama-tq-server.1 + +%install +%cmake_install +# Python requirements files. +install -Dpm644 requirements.txt -t %buildroot%_datadir/%name +cp -a requirements -t %buildroot%_datadir/%name +# Additional data. +cp -rp grammars -t %buildroot%_datadir/%name +# Not all examples. +install -Dp examples/*.sh -t %buildroot%_datadir/%name/examples +install -Dp examples/*.py -t %buildroot%_datadir/%name/examples +# We need to run the tests, not install them. +rm %buildroot%_bindir/test-* +rm %buildroot%_bindir/export-graph-ops +# Completions. +install -Dpm644 llama.bash %buildroot%_datadir/bash-completion/completions/llama-server +printf '%%s\n' llama-tq-cli llama-tq-simple llama-tq-run llama-tq-mtmd-cli | + xargs -ti ln -s llama-tq-server %buildroot%_datadir/bash-completion/completions/{} +mv %buildroot%_bindir/rpc-server %buildroot%_bindir/llama-tq-rpc-server +install -Dpm644 llama-tq-server.1 -t %buildroot%_man1dir + +%check +( ! cuobjdump --list-elf %buildroot%_libexecdir/llama-tq/libggml-cuda.so | grep -F -v -e .cubin ) +( ! cuobjdump --list-ptx %buildroot%_libexecdir/llama-tq/libggml-cuda.so | grep -F -v -e .sm_80.ptx -e .sm_52.ptx ) +# Local path are more useful for debugging becasue they are not stripped by default. +export LD_LIBRARY_PATH=$PWD/%_cmake__builddir/bin PATH+=:$PWD/%_cmake__builddir/bin +llama-tq-server --version +llama-tq-server --version |& grep -Ex 'version: %version \(\S+ \[%release\]\)' +# test-eval-callback wants network. +%ctest -E 'test-download-model|test-eval-callback|test-state-restore-fragmented|test-llama-archs' +# It broke so it can suck my dick +#llama-completion -m /usr/share/tinyllamas/stories260K.gguf -p "Hello" -s 42 -n 500 2>/dev/null +#llama-completion -m /usr/share/tinyllamas/stories260K.gguf -p "Once upon a time" -s 55 -n 33 2>/dev/null | +# grep 'Once upon a time, there was a boy named Tom. Tom had a big box of colors.' +# We do not provide convert tools. +mv %buildroot%_bindir/convert*.py -t %buildroot%_datadir/%name/examples + +%files + +%files -n libllama-turboquant +%_libdir/libllama-tq.so.0.0.%version +%_libdir/libggml-tq.so.0 +%_libdir/libggml-tq.so.0.* +%_libdir/libggml-tq-base.so.0 +%_libdir/libggml-tq-base.so.0.* +%_libdir/libmtmd-tq.so.0.0.%version + +%files -n libllama-turboquant-devel +%_libdir/libllama-tq.so +%_libdir/libggml-tq.so +%_libdir/libggml-tq-base.so +%_libdir/libmtmd-tq.so +%_includedir/llama-tq*.h +%_includedir/gguf-tq.h +%_includedir/ggml-tq*.h +%_includedir/mtmd-tq*.h +%_cmakedir/ggml-tq +%_cmakedir/llama-tq +%_pkgconfigdir/llama-tq.pc + +%files cpu +%define _customdocdir %_docdir/%name +%doc LICENSE README.md docs build-options.txt +%_bindir/llama-tq-* +%dir %_datadir/%name +%dir %_datadir/%name/examples +%_datadir/%name/examples/*.sh +%_datadir/%name/examples/*.py +%_datadir/%name/requirements* +%_datadir/%name/grammars +%dir %_libexecdir/llama-tq +%_libexecdir/llama-tq/libggml-cpu*.so +%_libexecdir/llama-tq/libggml-rpc.so +%_datadir/bash-completion/completions/llama-tq-* +%_man1dir/llama-tq-server.1* + +%if_with cuda +%files cuda +%dir %_libexecdir/llama +%_libexecdir/llama-tq/libggml-cuda.so +%endif + +%if_with vulkan +%files vulkan +%dir %_libexecdir/llama-tq +%_libexecdir/llama-tq/libggml-vulkan.so +%endif + +%changelog +* Tue Apr 07 2026 Arseniy Romenskiy 1:8659-alt1 +- Update to b8659 (2026-04-07). + +* Mon Apr 06 2026 Vitaly Chikunov 1:8681-alt1 +- Update to b8681 (2026-04-06). + +* Sun Mar 22 2026 Vitaly Chikunov 1:8470-alt1 +- Update to b8470 (2026-03-22). + +* Tue Mar 03 2026 Vitaly Chikunov 1:8192-alt1 +- Update to b8192 (2026-03-03). + +* Fri Feb 13 2026 Vitaly Chikunov 1:8018-alt1 +- Update to b8018 (2026-02-12). + +* Sat Jan 24 2026 Vitaly Chikunov 1:7819-alt1 +- Update to b7819 (2026-01-23). +- Responses API support (partial). + +* Sun Dec 14 2025 Vitaly Chikunov 1:7388-alt1 +- Update to b7388 (2025-12-13). +- llama-cli: New CLI experience (with the old moved to llama-completion). +- llama-server: Live model switching. +- Messages API support. + +* Fri Nov 21 2025 Vitaly Chikunov 1:7127-alt1 +- Update to b7127 (2025-11-21). +- spec: Remove llama.cpp-convert package. +- model: detect GigaChat3-10-A1.8B as deepseek lite. + +* Tue Oct 28 2025 Vitaly Chikunov 1:6869-alt1 +- Update to b6869 (2025-10-28). + +* Sat Sep 06 2025 Vitaly Chikunov 1:6397-alt1 +- Update to b6397 (2025-09-06). +- Python-based model conversion scripts are sub-packaged. Note that they are + not supported and are provided as-is. + +* Sat Aug 09 2025 Vitaly Chikunov 1:6121-alt1 +- Update to b6121 (2025-08-08). + +* Wed Jun 25 2025 Vitaly Chikunov 1:5753-alt1 +- Update to b5753 (2025-06-24). +- Install an experimental rpc backend and server. The rpc code is a + proof-of-concept, fragile, and insecure. + +* Sat May 10 2025 Vitaly Chikunov 1:5332-alt1 +- Update to b5332 (2025-05-09), with vision support in llama-server. +- Enable Vulkan backend (for GPU) in llama.cpp-vulkan package. + +* Mon Mar 10 2025 Vitaly Chikunov 1:4855-alt1 +- Update to b4855 (2025-03-07). +- Enable CUDA backend (for NVIDIA GPU) in llama.cpp-cuda package. +- Disable BLAS backend (issues/12282). +- Install bash-completions. + +* Tue Jul 23 2024 Vitaly Chikunov 1:3441-alt1 +- Update to b3441 (2024-07-23). +- spec: Package libllama and libllama-devel (ALT#50962). +- spec: Use upstream install procedure; as a consequence, some binary names are + changed. + +* Mon Jun 03 2024 Vitaly Chikunov 1:3072-alt1.20240603 +- Update to b3072 (2024-06-03). +- The version scheme now matches the upstream build number more closely, + instead of using the commit date. +- Build with libcurl and OpenBLAS support. + +* Tue May 28 2024 Vitaly Chikunov 20240527-alt1 +- Update to b3012 (2024-05-27). + +* Mon Feb 26 2024 Vitaly Chikunov 20240225-alt1 +- Update to b2259 (2024-02-25). + +* Fri Oct 20 2023 Vitaly Chikunov 20231019-alt1 +- Update to b1400 (2023-10-19). +- Install experimental converters (convert- prefixed tools). + +* Sun Jul 30 2023 Vitaly Chikunov 20230728-alt1 +- Update to master-8a88e58 (2023-07-28). + +* Sun May 14 2023 Vitaly Chikunov 20230513-alt1 +- Build master-bda4d7c (2023-05-13). + +* Wed Apr 19 2023 Vitaly Chikunov 20230419-alt1 +- Build master-6667401 (2023-04-19). diff --git a/.gear/rules b/.gear/rules index fe7208055..99dd28d7b 100644 --- a/.gear/rules +++ b/.gear/rules @@ -2,4 +2,8 @@ # for version output. tar: b@version@:. diff: b@version@:. . name=@name@-@version@.patch +<<<<<<< HEAD spec: .gear/llama.cpp.spec +======= +spec: .gear/llama-cpp-turboquant.spec +>>>>>>> 0591e57d diff --git a/.gear/tags/list b/.gear/tags/list index fc62b67c5..63179e6ef 100644 --- a/.gear/tags/list +++ b/.gear/tags/list @@ -1 +1,5 @@ +<<<<<<< HEAD 506200cf8b5c8419ce97d16dc8c50f4634e21ebe b8681 +======= +e43970099269b5b6da36b8977ad47697602e4e54 b8659 +>>>>>>> 0591e57d