server: /v1/responses (partial) (#18486)
* from previous PR * Make instruction(system) as first message * Convert [input_message] (text/image/file) * Rename convert_responses_to_chatcmpl(body) -> response_body * Initial tool call support * Erase instructions field from chatcmpl body * Feed reasoning texts to chat template * Use std::vector instead of opaque json array * Make output_item.added events consistent * Move `server_task_result_cmpl_partial::update` from header to source * Match ID of output_item.added and .done events * Add function_call only if there is no "fc_" prefix * Add function call output at non-streaming API * Test if ID is persistent * Add doc * Fix style - use trailing comma * Rewrite state management * catch up with upstream/master * Fix style - "type" is the first item of SSE data * Explicitly check "instructions" from response_body * Make lambdas static * Check if reasoning content exists * Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final * Reject `input_file` since it is not supported by chatcmpl * Add "fc_" prefix to non-straming function call id as coderabbit pointed out --------- Co-authored-by: openingnow <>
This commit is contained in:
parent
33f890e579
commit
fbbf3ad190
11 changed files with 836 additions and 40 deletions
|
|
@ -3073,6 +3073,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
|
|||
json first_result_json = first_result->to_json();
|
||||
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
|
||||
res->data = format_anthropic_sse(first_result_json);
|
||||
} else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
|
||||
res->data = format_oai_resp_sse(first_result_json);
|
||||
} else {
|
||||
res->data = format_oai_sse(first_result_json);
|
||||
}
|
||||
|
|
@ -3107,13 +3109,16 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
|
|||
|
||||
// check if there is more data
|
||||
if (!rd.has_next()) {
|
||||
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
|
||||
// Anthropic doesn't send [DONE], message_stop was already sent
|
||||
output = "";
|
||||
} else if (res_type != TASK_RESPONSE_TYPE_NONE) {
|
||||
output = "data: [DONE]\n\n";
|
||||
} else {
|
||||
output = "";
|
||||
switch (res_type) {
|
||||
case TASK_RESPONSE_TYPE_NONE:
|
||||
case TASK_RESPONSE_TYPE_OAI_RESP:
|
||||
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
||||
output = "";
|
||||
break;
|
||||
|
||||
default:
|
||||
output = "data: [DONE]\n\n";
|
||||
break;
|
||||
}
|
||||
SRV_DBG("%s", "all results received, terminating stream\n");
|
||||
return false; // no more data, terminate
|
||||
|
|
@ -3141,6 +3146,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
|
|||
json res_json = result->to_json();
|
||||
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
|
||||
output = format_anthropic_sse(res_json);
|
||||
} else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
|
||||
output = format_oai_resp_sse(res_json);
|
||||
} else {
|
||||
output = format_oai_sse(res_json);
|
||||
}
|
||||
|
|
@ -3575,6 +3582,22 @@ void server_routes::init_routes() {
|
|||
TASK_RESPONSE_TYPE_OAI_CHAT);
|
||||
};
|
||||
|
||||
this->post_responses_oai = [this](const server_http_req & req) {
|
||||
auto res = create_response();
|
||||
std::vector<raw_buffer> files;
|
||||
json body = convert_responses_to_chatcmpl(json::parse(req.body));
|
||||
json body_parsed = oaicompat_chat_params_parse(
|
||||
body,
|
||||
meta->chat_params,
|
||||
files);
|
||||
return handle_completions_impl(
|
||||
req,
|
||||
SERVER_TASK_TYPE_COMPLETION,
|
||||
body_parsed,
|
||||
files,
|
||||
TASK_RESPONSE_TYPE_OAI_RESP);
|
||||
};
|
||||
|
||||
this->post_anthropic_messages = [this](const server_http_req & req) {
|
||||
auto res = create_response();
|
||||
std::vector<raw_buffer> files;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue