server: prevent data race from HTTP threads (#18263)
* server: prevent data race from HTTP threads * fix params * fix default_generation_settings * nits: make handle_completions_impl looks less strange * stricter const * fix GGML_ASSERT(idx < states.size()) * move index to be managed by server_response_reader * http: make sure req & res lifecycle are tied together * fix compile * fix index handling buggy * fix data race for lora endpoint * nits: fix shadow variable * nits: revert redundant changes * nits: correct naming for json_webui_settings
This commit is contained in:
parent
3997c78e33
commit
6ce863c803
11 changed files with 459 additions and 366 deletions
|
|
@ -177,12 +177,11 @@ bool server_http_context::init(const common_params & params) {
|
|||
if (!ready) {
|
||||
auto tmp = string_split<std::string>(req.path, '.');
|
||||
if (req.path == "/" || tmp.back() == "html") {
|
||||
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
||||
res.status = 503;
|
||||
} else if (req.path == "/models" || req.path == "/v1/models" || req.path == "/api/tags") {
|
||||
// allow the models endpoint to be accessed during loading
|
||||
return true;
|
||||
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
||||
} else {
|
||||
// no endpoints is allowed to be accessed when the server is not ready
|
||||
// this is to prevent any data races or inconsistent states
|
||||
res.status = 503;
|
||||
res.set_content(
|
||||
safe_json_to_str(json {
|
||||
|
|
@ -334,12 +333,16 @@ static std::map<std::string, std::string> get_headers(const httplib::Request & r
|
|||
return headers;
|
||||
}
|
||||
|
||||
static void process_handler_response(server_http_res_ptr & response, httplib::Response & res) {
|
||||
// using unique_ptr for request to allow safe capturing in lambdas
|
||||
using server_http_req_ptr = std::unique_ptr<server_http_req>;
|
||||
|
||||
static void process_handler_response(server_http_req_ptr && request, server_http_res_ptr & response, httplib::Response & res) {
|
||||
if (response->is_stream()) {
|
||||
res.status = response->status;
|
||||
set_headers(res, response->headers);
|
||||
std::string content_type = response->content_type;
|
||||
// convert to shared_ptr as both chunked_content_provider() and on_complete() need to use it
|
||||
std::shared_ptr<server_http_req> q_ptr = std::move(request);
|
||||
std::shared_ptr<server_http_res> r_ptr = std::move(response);
|
||||
const auto chunked_content_provider = [response = r_ptr](size_t, httplib::DataSink & sink) -> bool {
|
||||
std::string chunk;
|
||||
|
|
@ -355,8 +358,9 @@ static void process_handler_response(server_http_res_ptr & response, httplib::Re
|
|||
}
|
||||
return has_next;
|
||||
};
|
||||
const auto on_complete = [response = r_ptr](bool) mutable {
|
||||
const auto on_complete = [request = q_ptr, response = r_ptr](bool) mutable {
|
||||
response.reset(); // trigger the destruction of the response object
|
||||
request.reset(); // trigger the destruction of the request object
|
||||
};
|
||||
res.set_chunked_content_provider(content_type, chunked_content_provider, on_complete);
|
||||
} else {
|
||||
|
|
@ -368,27 +372,29 @@ static void process_handler_response(server_http_res_ptr & response, httplib::Re
|
|||
|
||||
void server_http_context::get(const std::string & path, const server_http_context::handler_t & handler) const {
|
||||
pimpl->srv->Get(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) {
|
||||
server_http_res_ptr response = handler(server_http_req{
|
||||
server_http_req_ptr request = std::make_unique<server_http_req>(server_http_req{
|
||||
get_params(req),
|
||||
get_headers(req),
|
||||
req.path,
|
||||
req.body,
|
||||
req.is_connection_closed
|
||||
});
|
||||
process_handler_response(response, res);
|
||||
server_http_res_ptr response = handler(*request);
|
||||
process_handler_response(std::move(request), response, res);
|
||||
});
|
||||
}
|
||||
|
||||
void server_http_context::post(const std::string & path, const server_http_context::handler_t & handler) const {
|
||||
pimpl->srv->Post(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) {
|
||||
server_http_res_ptr response = handler(server_http_req{
|
||||
server_http_req_ptr request = std::make_unique<server_http_req>(server_http_req{
|
||||
get_params(req),
|
||||
get_headers(req),
|
||||
req.path,
|
||||
req.body,
|
||||
req.is_connection_closed
|
||||
});
|
||||
process_handler_response(response, res);
|
||||
server_http_res_ptr response = handler(*request);
|
||||
process_handler_response(std::move(request), response, res);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue