server: prevent data race from HTTP threads (#18263)
* server: prevent data race from HTTP threads * fix params * fix default_generation_settings * nits: make handle_completions_impl looks less strange * stricter const * fix GGML_ASSERT(idx < states.size()) * move index to be managed by server_response_reader * http: make sure req & res lifecycle are tied together * fix compile * fix index handling buggy * fix data race for lora endpoint * nits: fix shadow variable * nits: revert redundant changes * nits: correct naming for json_webui_settings
This commit is contained in:
parent
3997c78e33
commit
6ce863c803
11 changed files with 459 additions and 366 deletions
|
|
@ -325,23 +325,25 @@ void server_response::terminate() {
|
|||
// server_response_reader
|
||||
//
|
||||
|
||||
void server_response_reader::post_task(server_task && task) {
|
||||
void server_response_reader::post_task(server_task && task, bool front) {
|
||||
GGML_ASSERT(id_tasks.empty() && "post_task() can only be called once per reader");
|
||||
task.index = 0;
|
||||
id_tasks.insert(task.id);
|
||||
states.push_back(task.create_state());
|
||||
queue_results.add_waiting_task_id(task.id);
|
||||
queue_tasks.post(std::move(task));
|
||||
queue_tasks.post(std::move(task), front);
|
||||
}
|
||||
|
||||
void server_response_reader::post_tasks(std::vector<server_task> && tasks) {
|
||||
void server_response_reader::post_tasks(std::vector<server_task> && tasks, bool front) {
|
||||
GGML_ASSERT(id_tasks.empty() && "post_tasks() can only be called once per reader");
|
||||
id_tasks = server_task::get_list_id(tasks);
|
||||
states.reserve(tasks.size());
|
||||
for (size_t i = 0; i < tasks.size(); i++) {
|
||||
tasks[i].index = i;
|
||||
states.push_back(tasks[i].create_state());
|
||||
}
|
||||
queue_results.add_waiting_tasks(tasks);
|
||||
queue_tasks.post(std::move(tasks));
|
||||
queue_tasks.post(std::move(tasks), front);
|
||||
}
|
||||
|
||||
bool server_response_reader::has_next() const {
|
||||
|
|
@ -367,7 +369,7 @@ server_task_result_ptr server_response_reader::next(const std::function<bool()>
|
|||
}
|
||||
if (!states.empty()) {
|
||||
// update the generation state if needed
|
||||
size_t idx = result->get_index();
|
||||
const size_t idx = result->index;
|
||||
GGML_ASSERT(idx < states.size());
|
||||
result->update(states[idx]);
|
||||
}
|
||||
|
|
@ -383,6 +385,7 @@ server_task_result_ptr server_response_reader::next(const std::function<bool()>
|
|||
|
||||
server_response_reader::batch_response server_response_reader::wait_for_all(const std::function<bool()> & should_stop) {
|
||||
batch_response batch_res;
|
||||
batch_res.results.clear();
|
||||
batch_res.results.resize(id_tasks.size());
|
||||
while (has_next()) {
|
||||
auto res = next(should_stop);
|
||||
|
|
@ -394,7 +397,7 @@ server_response_reader::batch_response server_response_reader::wait_for_all(cons
|
|||
batch_res.error = std::move(res);
|
||||
return batch_res;
|
||||
}
|
||||
const size_t idx = res->get_index();
|
||||
const size_t idx = res->index;
|
||||
GGML_ASSERT(idx < batch_res.results.size() && "index out of range");
|
||||
GGML_ASSERT(batch_res.results[idx] == nullptr && "duplicate result received");
|
||||
batch_res.results[idx] = std::move(res);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue