llama: end-to-end tests (#19802)

* tests: add end-to-end tests per model architecture

* fixup for rebase

* fix use-after-free in llama-model-loader.cpp

* fix CI

* fix WebGPU

* fix CI

* disable CI for macOS-latest-cmake-arm64

* use expert_weights_scale only if != 0.0f

* comments
This commit is contained in:
Johannes Gäßler 2026-03-08 12:30:21 +01:00 committed by GitHub
parent a95047979a
commit a976ff081b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 1607 additions and 633 deletions

View file

@ -4,6 +4,7 @@
#include <map>
#include <set>
#include <vector>
static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_CLIP, "clip" }, // dummy, only used by llama-quantize
@ -2786,6 +2787,15 @@ std::string LLM_TN_IMPL::str() const {
return name;
}
std::vector<llm_arch> llm_arch_all() {
std::vector<llm_arch> ret;
ret.reserve(LLM_ARCH_NAMES.size());
for (const auto & [arch, _] : LLM_ARCH_NAMES) {
ret.push_back(arch);
}
return ret;
}
const char * llm_arch_name(llm_arch arch) {
auto it = LLM_ARCH_NAMES.find(arch);
if (it == LLM_ARCH_NAMES.end()) {