common : add standard Hugging Face cache support (#20775)
* common : add standard Hugging Face cache support - Use HF API to find all files - Migrate all manifests to hugging face cache at startup Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Check with the quant tag Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Cleanup Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Improve error handling and report API errors Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Restore common_cached_model_info and align mmproj filtering Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Prefer main when getting cached ref Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Use cached files when HF API fails Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Use final_path.. Signed-off-by: Adrien Gallouët <angt@huggingface.co> * Check all inputs Signed-off-by: Adrien Gallouët <angt@huggingface.co> --------- Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
parent
e852eb4901
commit
8c7957ca33
8 changed files with 1061 additions and 330 deletions
|
|
@ -979,37 +979,20 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
|||
for (size_t i = 0; i < params.hf_repo.size(); i++) {
|
||||
common_params_model model;
|
||||
|
||||
// step 1: no `-hff` provided, we auto-detect based on the `-hf` flag
|
||||
if (params.hf_file.empty() || params.hf_file[i].empty()) {
|
||||
auto auto_detected = common_get_hf_file(params.hf_repo[i], params.hf_token, false);
|
||||
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
model.name = params.hf_repo[i];
|
||||
model.hf_repo = auto_detected.repo;
|
||||
model.hf_file = auto_detected.ggufFile;
|
||||
model.hf_repo = params.hf_repo[i];
|
||||
} else {
|
||||
model.hf_repo = params.hf_repo[i];
|
||||
model.hf_file = params.hf_file[i];
|
||||
}
|
||||
|
||||
// step 2: construct the model cache path
|
||||
std::string clean_fname = model.hf_repo + "_" + model.hf_file;
|
||||
string_replace_all(clean_fname, "\\", "_");
|
||||
string_replace_all(clean_fname, "/", "_");
|
||||
model.path = fs_get_cache_file(clean_fname);
|
||||
|
||||
// step 3: download the model if not exists
|
||||
std::string model_endpoint = get_model_endpoint();
|
||||
model.url = model_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
|
||||
|
||||
bool ok = common_download_model(model, params.hf_token, false);
|
||||
if (!ok) {
|
||||
fprintf(stderr, "error: failed to download model from %s\n", model.url.c_str());
|
||||
auto download_result = common_download_model(model, params.hf_token);
|
||||
if (download_result.model_path.empty()) {
|
||||
fprintf(stderr, "error: failed to download model from HuggingFace\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
params.model.push_back(model.path);
|
||||
params.model.push_back(download_result.model_path);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -103,8 +103,8 @@ def test_router_models_max_evicts_lru():
|
|||
|
||||
candidate_models = [
|
||||
"ggml-org/tinygemma3-GGUF:Q8_0",
|
||||
"ggml-org/test-model-stories260K",
|
||||
"ggml-org/test-model-stories260K-infill",
|
||||
"ggml-org/test-model-stories260K:F32",
|
||||
"ggml-org/test-model-stories260K-infill:F32",
|
||||
]
|
||||
|
||||
# Load only the first 2 models to fill the cache
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue