llama: end-to-end tests (#19802)
* tests: add end-to-end tests per model architecture * fixup for rebase * fix use-after-free in llama-model-loader.cpp * fix CI * fix WebGPU * fix CI * disable CI for macOS-latest-cmake-arm64 * use expert_weights_scale only if != 0.0f * comments
This commit is contained in:
parent
a95047979a
commit
a976ff081b
33 changed files with 1607 additions and 633 deletions
|
|
@ -1158,6 +1158,7 @@ llm_graph_result * llama_context::process_ubatch(const llama_ubatch & ubatch, ll
|
|||
{
|
||||
//const auto t_start_us = ggml_time_us();
|
||||
|
||||
// FIXME this call causes a crash if any model inputs were not used in the graph and were therefore not allocated
|
||||
res->set_inputs(&ubatch);
|
||||
|
||||
//LLAMA_LOG_INFO("graph set inputs time: %.3f ms\n", (ggml_time_us() - t_start_us)/1000.0);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue