server: /v1/responses (partial) (#18486)

* from previous PR

* Make instruction(system) as first message

* Convert [input_message] (text/image/file)

* Rename convert_responses_to_chatcmpl(body) -> response_body

* Initial tool call support

* Erase instructions field from chatcmpl body

* Feed reasoning texts to chat template

* Use std::vector instead of opaque json array

* Make output_item.added events consistent

* Move `server_task_result_cmpl_partial::update` from header to source

* Match ID of output_item.added and .done events

* Add function_call only if there is no "fc_" prefix

* Add function call output at non-streaming API

* Test if ID is persistent

* Add doc

* Fix style - use trailing comma

* Rewrite state management

* catch up with upstream/master

* Fix style - "type" is the first item of SSE data

* Explicitly check "instructions" from response_body

* Make lambdas static

* Check if reasoning content exists

* Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final

* Reject `input_file` since it is not supported by chatcmpl

* Add "fc_" prefix to non-straming function call id as coderabbit pointed out

---------

Co-authored-by: openingnow <>
This commit is contained in:
손희준 2026-01-22 01:47:23 +09:00 committed by GitHub
parent 33f890e579
commit fbbf3ad190
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 836 additions and 40 deletions

View file

@ -140,6 +140,7 @@ int main(int argc, char ** argv) {
routes.post_completions = models_routes->proxy_post;
routes.post_completions_oai = models_routes->proxy_post;
routes.post_chat_completions = models_routes->proxy_post;
routes.post_responses_oai = models_routes->proxy_post;
routes.post_anthropic_messages = models_routes->proxy_post;
routes.post_anthropic_count_tokens = models_routes->proxy_post;
routes.post_infill = models_routes->proxy_post;
@ -176,6 +177,7 @@ int main(int argc, char ** argv) {
ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions));
ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai));
ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
ctx_http.post("/infill", ex_wrapper(routes.post_infill));