server: move msg diffs tracking to HTTP thread (#17740)
* server: move msg diffs tracking to HTTP thread * wip * tool call tests ok * minor : style * cont : fix * move states to server_response_reader * add safe-guard * fix * fix 2 --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
817d743cc1
commit
c4c10bfb86
5 changed files with 167 additions and 94 deletions
|
|
@ -565,6 +565,7 @@ std::vector<unsigned char> completion_token_output::str_to_bytes(const std::stri
|
|||
// server_task_result_cmpl_final
|
||||
//
|
||||
json server_task_result_cmpl_final::to_json() {
|
||||
GGML_ASSERT(is_updated && "update() must be called before to_json()");
|
||||
switch (res_type) {
|
||||
case TASK_RESPONSE_TYPE_NONE:
|
||||
return to_json_non_oaicompat();
|
||||
|
|
@ -582,8 +583,8 @@ json server_task_result_cmpl_final::to_json() {
|
|||
json server_task_result_cmpl_final::to_json_non_oaicompat() {
|
||||
json res = json {
|
||||
{"index", index},
|
||||
{"content", stream ? "" : content}, // in stream mode, content is already in last partial chunk
|
||||
{"tokens", stream ? llama_tokens {} : tokens},
|
||||
{"content", content},
|
||||
{"tokens", tokens},
|
||||
{"id_slot", id_slot},
|
||||
{"stop", true},
|
||||
{"model", oaicompat_model},
|
||||
|
|
@ -619,7 +620,7 @@ json server_task_result_cmpl_final::to_json_oaicompat() {
|
|||
json res = json {
|
||||
{"choices", json::array({
|
||||
json{
|
||||
{"text", stream ? "" : content}, // in stream mode, content is already in last partial chunk
|
||||
{"text", content},
|
||||
{"index", index},
|
||||
{"logprobs", logprobs},
|
||||
{"finish_reason", finish_reason},
|
||||
|
|
@ -700,6 +701,25 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
|
|||
return res;
|
||||
}
|
||||
|
||||
common_chat_msg task_result_state::update_chat_msg(
|
||||
const std::string & text_added,
|
||||
bool is_partial,
|
||||
std::vector<common_chat_msg_diff> & diffs) {
|
||||
generated_text += text_added;
|
||||
auto msg_prv_copy = chat_msg;
|
||||
SRV_DBG("Parsing chat message: %s\n", generated_text.c_str());
|
||||
auto new_msg = common_chat_parse(
|
||||
generated_text,
|
||||
is_partial,
|
||||
oaicompat_chat_syntax);
|
||||
if (!new_msg.empty()) {
|
||||
new_msg.set_tool_call_ids(generated_tool_call_ids, gen_tool_call_id);
|
||||
chat_msg = new_msg;
|
||||
diffs = common_chat_msg_diff::compute_diffs(msg_prv_copy, new_msg.empty() ? msg_prv_copy : new_msg);
|
||||
}
|
||||
return chat_msg;
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
||||
std::time_t t = std::time(0);
|
||||
std::string finish_reason = "length";
|
||||
|
|
@ -956,6 +976,7 @@ json server_task_result_cmpl_final::to_json_anthropic_stream() {
|
|||
// server_task_result_cmpl_partial
|
||||
//
|
||||
json server_task_result_cmpl_partial::to_json() {
|
||||
GGML_ASSERT(is_updated && "update() must be called before to_json()");
|
||||
switch (res_type) {
|
||||
case TASK_RESPONSE_TYPE_NONE:
|
||||
return to_json_non_oaicompat();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue