server: Add cached_tokens info to oaicompat responses (#19361)
* tests : fix fetch_server_test_models.py * server: to_json_oaicompat cached_tokens Adds OpenAI and Anthropic compatible information about the number of cached prompt tokens used in a response.
This commit is contained in:
parent
76f2dc70c3
commit
26c9ce1288
6 changed files with 61 additions and 31 deletions
|
|
@ -755,6 +755,15 @@ json server_task_result_cmpl_final::to_json_non_oaicompat() {
|
|||
return response_fields.empty() ? res : json_get_nested_values(response_fields, res);
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::usage_json_oaicompat() {
|
||||
return json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"prompt_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
};
|
||||
}
|
||||
|
||||
json server_task_result_cmpl_final::to_json_oaicompat() {
|
||||
std::time_t t = std::time(0);
|
||||
json logprobs = json(nullptr); // OAI default to null
|
||||
|
|
@ -780,11 +789,7 @@ json server_task_result_cmpl_final::to_json_oaicompat() {
|
|||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "text_completion"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens}
|
||||
}},
|
||||
{"usage", usage_json_oaicompat()},
|
||||
{"id", oaicompat_cmpl_id}
|
||||
};
|
||||
|
||||
|
|
@ -832,11 +837,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
|
|||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens}
|
||||
}},
|
||||
{"usage", usage_json_oaicompat()},
|
||||
{"id", oaicompat_cmpl_id}
|
||||
};
|
||||
|
||||
|
|
@ -901,11 +902,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
|
|||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion.chunk"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
}},
|
||||
{"usage", usage_json_oaicompat()},
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -984,6 +981,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp() {
|
|||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
}},
|
||||
};
|
||||
|
||||
|
|
@ -1092,7 +1090,8 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
|||
{"usage", json {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"output_tokens", n_decoded},
|
||||
{"total_tokens", n_decoded + n_prompt_tokens}
|
||||
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||
}}
|
||||
}},
|
||||
}}
|
||||
|
|
@ -1158,7 +1157,8 @@ json server_task_result_cmpl_final::to_json_anthropic() {
|
|||
{"stop_reason", stop_reason},
|
||||
{"stop_sequence", stopping_word.empty() ? nullptr : json(stopping_word)},
|
||||
{"usage", {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"cache_read_input_tokens", n_prompt_tokens_cache},
|
||||
{"input_tokens", n_prompt_tokens - n_prompt_tokens_cache},
|
||||
{"output_tokens", n_decoded}
|
||||
}}
|
||||
};
|
||||
|
|
@ -1668,7 +1668,8 @@ json server_task_result_cmpl_partial::to_json_anthropic() {
|
|||
{"stop_reason", nullptr},
|
||||
{"stop_sequence", nullptr},
|
||||
{"usage", {
|
||||
{"input_tokens", n_prompt_tokens},
|
||||
{"cache_read_input_tokens", n_prompt_tokens_cache},
|
||||
{"input_tokens", n_prompt_tokens - n_prompt_tokens_cache},
|
||||
{"output_tokens", 0}
|
||||
}}
|
||||
}}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue