server: allow router to report child instances sleep status (#20849)

* server: allow router to report child instances sleep status

* refactor

* move sleeping to state

* nits
This commit is contained in:
Xuan-Son Nguyen 2026-03-22 18:33:52 +01:00 committed by GitHub
parent bd3f1d9d65
commit 49bfddeca1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 102 additions and 43 deletions

View file

@ -259,6 +259,12 @@ int main(int argc, char ** argv) {
// load the model
LOG_INF("%s: loading model\n", __func__);
if (server_models::is_child_server()) {
ctx_server.on_sleeping_changed([&](bool sleeping) {
server_models::notify_router_sleeping_state(sleeping);
});
}
if (!ctx_server.load_model(params)) {
clean_up();
if (ctx_http.thread.joinable()) {
@ -309,9 +315,8 @@ int main(int argc, char ** argv) {
LOG_INF("%s: starting the main loop...\n", __func__);
// optionally, notify router server that this instance is ready
const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
std::thread monitor_thread;
if (router_port != nullptr) {
if (server_models::is_child_server()) {
monitor_thread = server_models::setup_child_server(shutdown_handler);
}