Autoparser - complete refactoring of parser architecture (#18675)
* Autoparser - full single commit squish * Final pre-merge changes: minor fixes, Kimi 2.5 model parser
This commit is contained in:
parent
34df42f7be
commit
566059a26b
63 changed files with 12967 additions and 10071 deletions
202
scripts/server-test-model.py
Normal file
202
scripts/server-test-model.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
import argparse
|
||||
import json
|
||||
import requests
|
||||
import logging
|
||||
import sys
|
||||
|
||||
handler = logging.StreamHandler(sys.stdout)
|
||||
handler.terminator = "" # ← no newline
|
||||
logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
|
||||
logger = logging.getLogger("server-test-model")
|
||||
|
||||
|
||||
def run_query(url, messages, tools=None, stream=False, tool_choice=None):
|
||||
payload = {
|
||||
"messages": messages,
|
||||
"stream": stream,
|
||||
"max_tokens": 5000,
|
||||
}
|
||||
if tools:
|
||||
payload["tools"] = tools
|
||||
if tool_choice:
|
||||
payload["tool_choice"] = tool_choice
|
||||
|
||||
try:
|
||||
response = requests.post(url, json=payload, stream=stream)
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.RequestException as e:
|
||||
if e.response is not None:
|
||||
logger.info(f"Response error: {e} for {e.response.content}\n")
|
||||
else:
|
||||
logger.info(f"Error connecting to server: {e}\n")
|
||||
return None
|
||||
|
||||
full_content = ""
|
||||
reasoning_content = ""
|
||||
tool_calls = []
|
||||
|
||||
if stream:
|
||||
logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
decoded_line = line.decode("utf-8")
|
||||
if decoded_line.startswith("data: "):
|
||||
data_str = decoded_line[6:]
|
||||
if data_str == "[DONE]":
|
||||
break
|
||||
try:
|
||||
data = json.loads(data_str)
|
||||
if "choices" in data and len(data["choices"]) > 0:
|
||||
delta = data["choices"][0].get("delta", {})
|
||||
|
||||
# Content
|
||||
content_chunk = delta.get("content", "")
|
||||
if content_chunk:
|
||||
full_content += content_chunk
|
||||
logger.info(content_chunk)
|
||||
|
||||
# Reasoning
|
||||
reasoning_chunk = delta.get("reasoning_content", "")
|
||||
if reasoning_chunk:
|
||||
reasoning_content += reasoning_chunk
|
||||
logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
|
||||
|
||||
# Tool calls
|
||||
if "tool_calls" in delta:
|
||||
for tc in delta["tool_calls"]:
|
||||
index = tc.get("index")
|
||||
if index is not None:
|
||||
while len(tool_calls) <= index:
|
||||
# Using "function" as type default but could be flexible
|
||||
tool_calls.append(
|
||||
{
|
||||
"id": "",
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "",
|
||||
"arguments": "",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
if "id" in tc:
|
||||
tool_calls[index]["id"] += tc["id"]
|
||||
if "function" in tc:
|
||||
if "name" in tc["function"]:
|
||||
tool_calls[index]["function"][
|
||||
"name"
|
||||
] += tc["function"]["name"]
|
||||
if "arguments" in tc["function"]:
|
||||
tool_calls[index]["function"][
|
||||
"arguments"
|
||||
] += tc["function"]["arguments"]
|
||||
|
||||
except json.JSONDecodeError:
|
||||
logger.info(f"Failed to decode JSON: {data_str}\n")
|
||||
logger.info("\n--- End of Stream ---\n")
|
||||
else:
|
||||
logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
|
||||
data = response.json()
|
||||
if "choices" in data and len(data["choices"]) > 0:
|
||||
message = data["choices"][0].get("message", {})
|
||||
full_content = message.get("content", "")
|
||||
reasoning_content = message.get("reasoning_content", "")
|
||||
tool_calls = message.get("tool_calls", [])
|
||||
logger.info(full_content)
|
||||
logger.info("--- End of Response ---\n")
|
||||
|
||||
return {
|
||||
"content": full_content,
|
||||
"reasoning_content": reasoning_content,
|
||||
"tool_calls": tool_calls,
|
||||
}
|
||||
|
||||
|
||||
def test_chat(url, stream):
|
||||
logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
|
||||
messages = [{"role": "user", "content": "What is the capital of France?"}]
|
||||
result = run_query(url, messages, stream=stream)
|
||||
|
||||
if result:
|
||||
if result["content"]:
|
||||
logger.info("PASS: Output received.\n")
|
||||
else:
|
||||
logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
|
||||
|
||||
if result.get("reasoning_content"):
|
||||
logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
|
||||
else:
|
||||
logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
|
||||
else:
|
||||
logger.info("FAIL: No result.\n")
|
||||
|
||||
|
||||
def test_tool_call(url, stream):
|
||||
logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the weather in London? Please use the get_weather tool.",
|
||||
}
|
||||
]
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the current weather in a given location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city and state, e.g. San Francisco, CA",
|
||||
},
|
||||
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
|
||||
},
|
||||
"required": ["location"],
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
|
||||
|
||||
if result:
|
||||
tcs = result.get("tool_calls")
|
||||
if tcs and len(tcs) > 0:
|
||||
logger.info("PASS: Tool calls detected.")
|
||||
for tc in tcs:
|
||||
func = tc.get("function", {})
|
||||
logger.info(f" Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
|
||||
else:
|
||||
logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
|
||||
|
||||
if result.get("reasoning_content"):
|
||||
logger.info(
|
||||
f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
|
||||
)
|
||||
else:
|
||||
logger.info("FAIL: Query failed.\n")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Test llama-server functionality.")
|
||||
parser.add_argument("--host", default="localhost", help="Server host")
|
||||
parser.add_argument("--port", default=8080, type=int, help="Server port")
|
||||
args = parser.parse_args()
|
||||
|
||||
base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
|
||||
logger.info(f"Testing server at {base_url}\n")
|
||||
|
||||
# Non-streaming tests
|
||||
test_chat(base_url, stream=False)
|
||||
test_tool_call(base_url, stream=False)
|
||||
|
||||
# Streaming tests
|
||||
test_chat(base_url, stream=True)
|
||||
test_tool_call(base_url, stream=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue