server: /v1/responses (partial) (#18486)
* from previous PR * Make instruction(system) as first message * Convert [input_message] (text/image/file) * Rename convert_responses_to_chatcmpl(body) -> response_body * Initial tool call support * Erase instructions field from chatcmpl body * Feed reasoning texts to chat template * Use std::vector instead of opaque json array * Make output_item.added events consistent * Move `server_task_result_cmpl_partial::update` from header to source * Match ID of output_item.added and .done events * Add function_call only if there is no "fc_" prefix * Add function call output at non-streaming API * Test if ID is persistent * Add doc * Fix style - use trailing comma * Rewrite state management * catch up with upstream/master * Fix style - "type" is the first item of SSE data * Explicitly check "instructions" from response_body * Make lambdas static * Check if reasoning content exists * Add `oai_resp_id` to task_result_state(also initialized at ctor), server_task_result_cmpl_partial, and server_task_result_cmpl_final * Reject `input_file` since it is not supported by chatcmpl * Add "fc_" prefix to non-straming function call id as coderabbit pointed out --------- Co-authored-by: openingnow <>
This commit is contained in:
parent
33f890e579
commit
fbbf3ad190
11 changed files with 836 additions and 40 deletions
|
|
@ -140,6 +140,7 @@ int main(int argc, char ** argv) {
|
|||
routes.post_completions = models_routes->proxy_post;
|
||||
routes.post_completions_oai = models_routes->proxy_post;
|
||||
routes.post_chat_completions = models_routes->proxy_post;
|
||||
routes.post_responses_oai = models_routes->proxy_post;
|
||||
routes.post_anthropic_messages = models_routes->proxy_post;
|
||||
routes.post_anthropic_count_tokens = models_routes->proxy_post;
|
||||
routes.post_infill = models_routes->proxy_post;
|
||||
|
|
@ -176,6 +177,7 @@ int main(int argc, char ** argv) {
|
|||
ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions));
|
||||
ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
|
||||
ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
|
||||
ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai));
|
||||
ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
|
||||
ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
|
||||
ctx_http.post("/infill", ex_wrapper(routes.post_infill));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue