server: prevent data race from HTTP threads (#18263)

* server: prevent data race from HTTP threads

* fix params

* fix default_generation_settings

* nits: make handle_completions_impl looks less strange

* stricter const

* fix GGML_ASSERT(idx < states.size())

* move index to be managed by server_response_reader

* http: make sure req & res lifecycle are tied together

* fix compile

* fix index handling buggy

* fix data race for lora endpoint

* nits: fix shadow variable

* nits: revert redundant changes

* nits: correct naming for json_webui_settings
This commit is contained in:
Xuan-Son Nguyen 2025-12-22 14:23:34 +01:00 committed by GitHub
parent 3997c78e33
commit 6ce863c803
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 459 additions and 366 deletions

View file

@ -177,12 +177,11 @@ bool server_http_context::init(const common_params & params) {
if (!ready) {
auto tmp = string_split<std::string>(req.path, '.');
if (req.path == "/" || tmp.back() == "html") {
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
res.status = 503;
} else if (req.path == "/models" || req.path == "/v1/models" || req.path == "/api/tags") {
// allow the models endpoint to be accessed during loading
return true;
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
} else {
// no endpoints is allowed to be accessed when the server is not ready
// this is to prevent any data races or inconsistent states
res.status = 503;
res.set_content(
safe_json_to_str(json {
@ -334,12 +333,16 @@ static std::map<std::string, std::string> get_headers(const httplib::Request & r
return headers;
}
static void process_handler_response(server_http_res_ptr & response, httplib::Response & res) {
// using unique_ptr for request to allow safe capturing in lambdas
using server_http_req_ptr = std::unique_ptr<server_http_req>;
static void process_handler_response(server_http_req_ptr && request, server_http_res_ptr & response, httplib::Response & res) {
if (response->is_stream()) {
res.status = response->status;
set_headers(res, response->headers);
std::string content_type = response->content_type;
// convert to shared_ptr as both chunked_content_provider() and on_complete() need to use it
std::shared_ptr<server_http_req> q_ptr = std::move(request);
std::shared_ptr<server_http_res> r_ptr = std::move(response);
const auto chunked_content_provider = [response = r_ptr](size_t, httplib::DataSink & sink) -> bool {
std::string chunk;
@ -355,8 +358,9 @@ static void process_handler_response(server_http_res_ptr & response, httplib::Re
}
return has_next;
};
const auto on_complete = [response = r_ptr](bool) mutable {
const auto on_complete = [request = q_ptr, response = r_ptr](bool) mutable {
response.reset(); // trigger the destruction of the response object
request.reset(); // trigger the destruction of the request object
};
res.set_chunked_content_provider(content_type, chunked_content_provider, on_complete);
} else {
@ -368,27 +372,29 @@ static void process_handler_response(server_http_res_ptr & response, httplib::Re
void server_http_context::get(const std::string & path, const server_http_context::handler_t & handler) const {
pimpl->srv->Get(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) {
server_http_res_ptr response = handler(server_http_req{
server_http_req_ptr request = std::make_unique<server_http_req>(server_http_req{
get_params(req),
get_headers(req),
req.path,
req.body,
req.is_connection_closed
});
process_handler_response(response, res);
server_http_res_ptr response = handler(*request);
process_handler_response(std::move(request), response, res);
});
}
void server_http_context::post(const std::string & path, const server_http_context::handler_t & handler) const {
pimpl->srv->Post(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) {
server_http_res_ptr response = handler(server_http_req{
server_http_req_ptr request = std::make_unique<server_http_req>(server_http_req{
get_params(req),
get_headers(req),
req.path,
req.body,
req.is_connection_closed
});
process_handler_response(response, res);
server_http_res_ptr response = handler(*request);
process_handler_response(std::move(request), response, res);
});
}