llama: end-to-end tests (#19802)

* tests: add end-to-end tests per model architecture * fixup for rebase * fix use-after-free in llama-model-loader.cpp * fix CI * fix WebGPU * fix CI * disable CI for macOS-latest-cmake-arm64 * use expert_weights_scale only if != 0.0f * comments
2026-03-08 12:30:21 +01:00 · 2026-03-08 12:30:21 +01:00 · a976ff081b
commit a976ff081b
parent a95047979a
33 changed files with 1607 additions and 633 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -1158,6 +1158,7 @@ llm_graph_result * llama_context::process_ubatch(const llama_ubatch & ubatch, ll
    {
        //const auto t_start_us = ggml_time_us();

+        // FIXME this call causes a crash if any model inputs were not used in the graph and were therefore not allocated
        res->set_inputs(&ubatch);

        //LLAMA_LOG_INFO("graph set inputs time: %.3f ms\n", (ggml_time_us() - t_start_us)/1000.0);