llama: fix llama-model-saver (#20503)

* llama : add fd-based model loading via llama_model_load_from_fd

* llama : address review feedback for fd-based model loading

* llama : use FILE pointer instead of fd in public API

* llama : use FILE pointer consistently, address review feedback

* fixup

* fix tensor names

* fix llama-model-saver

* roundtrip tests

* fixup

* refactor tests

* fix prints

* fix model saving

* fix CI, disable Chameleon

* print seed

---------

Co-authored-by: Siddhesh2377 <siddheshsonar2377@gmail.com>
This commit is contained in:
Johannes Gäßler 2026-03-25 11:53:16 +01:00 committed by GitHub
parent 69e0ecef06
commit 36dafba5c4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 338 additions and 99 deletions

View file

@ -77,6 +77,7 @@ extern "C" {
};
GGML_API struct gguf_context * gguf_init_empty(void);
GGML_API struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params);
GGML_API struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params);
//GGML_API struct gguf_context * gguf_init_from_buffer(..);
@ -189,6 +190,7 @@ extern "C" {
//
// write the entire context to a binary file
GGML_API bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta);
GGML_API bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta);
// get the size in bytes of the meta data (header, kv pairs, tensor info) including padding

View file

@ -773,6 +773,5 @@ inline bool ggml_check_edges(const struct ggml_cgraph * cgraph,
// expose GGUF internals for test code
GGML_API size_t gguf_type_size(enum gguf_type type);
GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params);
GGML_API void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & buf, bool only_meta);
#endif // __cplusplus

View file

@ -394,7 +394,11 @@ bool gguf_read_emplace_helper(const struct gguf_reader & gr, std::vector<struct
return true;
}
struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params) {
struct gguf_context * gguf_init_from_file_ptr(FILE * file, struct gguf_init_params params) {
if (!file) {
return nullptr;
}
const struct gguf_reader gr(file);
struct gguf_context * ctx = new gguf_context;
@ -848,7 +852,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
return nullptr;
}
struct gguf_context * result = gguf_init_from_file_impl(file, params);
struct gguf_context * result = gguf_init_from_file_ptr(file, params);
fclose(file);
return result;
}
@ -1508,6 +1512,19 @@ void gguf_write_to_buf(const struct gguf_context * ctx, std::vector<int8_t> & bu
gguf_write_out(ctx, gw, only_meta);
}
bool gguf_write_to_file_ptr(const struct gguf_context * ctx, FILE * file, bool only_meta) {
GGML_ASSERT(file);
try {
gguf_writer_file gw(file);
gguf_write_out(ctx, gw, only_meta);
} catch (const std::runtime_error& ex) {
GGML_LOG_ERROR("%s: failed to write GGUF data: %s\n", __func__, ex.what());
return false;
}
return true;
}
bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, bool only_meta) {
FILE * file = ggml_fopen(fname, "wb");
@ -1516,17 +1533,13 @@ bool gguf_write_to_file(const struct gguf_context * ctx, const char * fname, boo
return false;
}
try {
gguf_writer_file gw(file);
gguf_write_out(ctx, gw, only_meta);
} catch (const std::runtime_error& ex) {
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s': %s\n", __func__, fname, ex.what());
fclose(file);
return false;
const bool success = gguf_write_to_file_ptr(ctx, file, only_meta);
if (!success) {
GGML_LOG_ERROR("%s: failed to write GGUF data into '%s'\n", __func__, fname);
}
fclose(file);
return true;
return success;
}
size_t gguf_get_meta_size(const struct gguf_context * ctx) {