server: /v1/responses (partial) (#18486)

* from previous PR

* Make instruction(system) as first message

* Convert [input_message] (text/image/file)

* Rename convert_responses_to_chatcmpl(body) -> response_body

* Initial tool call support

* Erase instructions field from chatcmpl body

* Feed reasoning texts to chat template

* Use std::vector instead of opaque json array

* Make output_item.added events consistent

* Move `server_task_result_cmpl_partial::update` from header to source

* Match ID of output_item.added and .done events

* Add function_call only if there is no "fc_" prefix

* Add function call output at non-streaming API

* Test if ID is persistent

* Add doc

* Fix style - use trailing comma

* Rewrite state management

* catch up with upstream/master

* Fix style - "type" is the first item of SSE data

* Explicitly check "instructions" from response_body

* Make lambdas static

* Check if reasoning content exists

* Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final

* Reject `input_file` since it is not supported by chatcmpl

* Add "fc_" prefix to non-straming function call id as coderabbit pointed out

---------

Co-authored-by: openingnow <>
This commit is contained in:
손희준 2026-01-22 01:47:23 +09:00 committed by GitHub
parent 33f890e579
commit fbbf3ad190
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 836 additions and 40 deletions

View file

@ -3073,6 +3073,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
json first_result_json = first_result->to_json();
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
res->data = format_anthropic_sse(first_result_json);
} else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
res->data = format_oai_resp_sse(first_result_json);
} else {
res->data = format_oai_sse(first_result_json);
}
@ -3107,13 +3109,16 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
// check if there is more data
if (!rd.has_next()) {
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
// Anthropic doesn't send [DONE], message_stop was already sent
output = "";
} else if (res_type != TASK_RESPONSE_TYPE_NONE) {
output = "data: [DONE]\n\n";
} else {
output = "";
switch (res_type) {
case TASK_RESPONSE_TYPE_NONE:
case TASK_RESPONSE_TYPE_OAI_RESP:
case TASK_RESPONSE_TYPE_ANTHROPIC:
output = "";
break;
default:
output = "data: [DONE]\n\n";
break;
}
SRV_DBG("%s", "all results received, terminating stream\n");
return false; // no more data, terminate
@ -3141,6 +3146,8 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
json res_json = result->to_json();
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
output = format_anthropic_sse(res_json);
} else if (res_type == TASK_RESPONSE_TYPE_OAI_RESP) {
output = format_oai_resp_sse(res_json);
} else {
output = format_oai_sse(res_json);
}
@ -3575,6 +3582,22 @@ void server_routes::init_routes() {
TASK_RESPONSE_TYPE_OAI_CHAT);
};
this->post_responses_oai = [this](const server_http_req & req) {
auto res = create_response();
std::vector<raw_buffer> files;
json body = convert_responses_to_chatcmpl(json::parse(req.body));
json body_parsed = oaicompat_chat_params_parse(
body,
meta->chat_params,
files);
return handle_completions_impl(
req,
SERVER_TASK_TYPE_COMPLETION,
body_parsed,
files,
TASK_RESPONSE_TYPE_OAI_RESP);
};
this->post_anthropic_messages = [this](const server_http_req & req) {
auto res = create_response();
std::vector<raw_buffer> files;