Autoparser - complete refactoring of parser architecture (#18675)

* Autoparser - full single commit squish * Final pre-merge changes: minor fixes, Kimi 2.5 model parser
2026-03-06 21:01:00 +01:00 · 2026-03-06 21:01:00 +01:00 · 566059a26b
commit 566059a26b
parent 34df42f7be
63 changed files with 12967 additions and 10071 deletions
--- a/scripts/server-test-model.py
+++ b/scripts/server-test-model.py
@ -0,0 +1,202 @@
+import argparse
+import json
+import requests
+import logging
+import sys
+
+handler = logging.StreamHandler(sys.stdout)
+handler.terminator = ""   # ← no newline
+logging.basicConfig(level=logging.INFO, format='%(message)s', handlers=[handler])
+logger = logging.getLogger("server-test-model")
+
+
+def run_query(url, messages, tools=None, stream=False, tool_choice=None):
+    payload = {
+        "messages": messages,
+        "stream": stream,
+        "max_tokens": 5000,
+    }
+    if tools:
+        payload["tools"] = tools
+    if tool_choice:
+        payload["tool_choice"] = tool_choice
+
+    try:
+        response = requests.post(url, json=payload, stream=stream)
+        response.raise_for_status()
+    except requests.exceptions.RequestException as e:
+        if e.response is not None:
+            logger.info(f"Response error: {e} for {e.response.content}\n")
+        else:
+            logger.info(f"Error connecting to server: {e}\n")
+        return None
+
+    full_content = ""
+    reasoning_content = ""
+    tool_calls = []
+
+    if stream:
+        logger.info(f"--- Streaming response (Tools: {bool(tools)}) ---\n")
+        for line in response.iter_lines():
+            if line:
+                decoded_line = line.decode("utf-8")
+                if decoded_line.startswith("data: "):
+                    data_str = decoded_line[6:]
+                    if data_str == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        if "choices" in data and len(data["choices"]) > 0:
+                            delta = data["choices"][0].get("delta", {})
+
+                            # Content
+                            content_chunk = delta.get("content", "")
+                            if content_chunk:
+                                full_content += content_chunk
+                                logger.info(content_chunk)
+
+                            # Reasoning
+                            reasoning_chunk = delta.get("reasoning_content", "")
+                            if reasoning_chunk:
+                                reasoning_content += reasoning_chunk
+                                logger.info(f"\x1B[3m{reasoning_chunk}\x1B[0m")
+
+                            # Tool calls
+                            if "tool_calls" in delta:
+                                for tc in delta["tool_calls"]:
+                                    index = tc.get("index")
+                                    if index is not None:
+                                        while len(tool_calls) <= index:
+                                            # Using "function" as type default but could be flexible
+                                            tool_calls.append(
+                                                {
+                                                    "id": "",
+                                                    "type": "function",
+                                                    "function": {
+                                                        "name": "",
+                                                        "arguments": "",
+                                                    },
+                                                }
+                                            )
+
+                                        if "id" in tc:
+                                            tool_calls[index]["id"] += tc["id"]
+                                        if "function" in tc:
+                                            if "name" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "name"
+                                                ] += tc["function"]["name"]
+                                            if "arguments" in tc["function"]:
+                                                tool_calls[index]["function"][
+                                                    "arguments"
+                                                ] += tc["function"]["arguments"]
+
+                    except json.JSONDecodeError:
+                        logger.info(f"Failed to decode JSON: {data_str}\n")
+        logger.info("\n--- End of Stream ---\n")
+    else:
+        logger.info(f"--- Non-streaming response (Tools: {bool(tools)}) ---\n")
+        data = response.json()
+        if "choices" in data and len(data["choices"]) > 0:
+            message = data["choices"][0].get("message", {})
+            full_content = message.get("content", "")
+            reasoning_content = message.get("reasoning_content", "")
+            tool_calls = message.get("tool_calls", [])
+            logger.info(full_content)
+        logger.info("--- End of Response ---\n")
+
+    return {
+        "content": full_content,
+        "reasoning_content": reasoning_content,
+        "tool_calls": tool_calls,
+    }
+
+
+def test_chat(url, stream):
+    logger.info(f"\n=== Testing Chat (Stream={stream}) ===\n")
+    messages = [{"role": "user", "content": "What is the capital of France?"}]
+    result = run_query(url, messages, stream=stream)
+
+    if result:
+        if result["content"]:
+            logger.info("PASS: Output received.\n")
+        else:
+            logger.info("WARN: No content received (valid if strict tool call, but unexpected here).\n")
+
+        if result.get("reasoning_content"):
+            logger.info(f"INFO: Reasoning content detected ({len(result['reasoning_content'])} chars).\n")
+        else:
+            logger.info("INFO: No reasoning content detected (Standard model behavior).\n")
+    else:
+        logger.info("FAIL: No result.\n")
+
+
+def test_tool_call(url, stream):
+    logger.info(f"\n=== Testing Tool Call (Stream={stream}) ===\n")
+    messages = [
+        {
+            "role": "user",
+            "content": "What is the weather in London? Please use the get_weather tool.",
+        }
+    ]
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    result = run_query(url, messages, tools=tools, tool_choice="auto", stream=stream)
+
+    if result:
+        tcs = result.get("tool_calls")
+        if tcs and len(tcs) > 0:
+            logger.info("PASS: Tool calls detected.")
+            for tc in tcs:
+                func = tc.get("function", {})
+                logger.info(f"  Tool: {func.get('name')}, Args: {func.get('arguments')}\n")
+        else:
+            logger.info(f"FAIL: No tool calls. Content: {result['content']}\n")
+
+        if result.get("reasoning_content"):
+            logger.info(
+                f"INFO: Reasoning content detected during tool call ({len(result['reasoning_content'])} chars).\n"
+            )
+    else:
+        logger.info("FAIL: Query failed.\n")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Test llama-server functionality.")
+    parser.add_argument("--host", default="localhost", help="Server host")
+    parser.add_argument("--port", default=8080, type=int, help="Server port")
+    args = parser.parse_args()
+
+    base_url = f"http://{args.host}:{args.port}/v1/chat/completions"
+    logger.info(f"Testing server at {base_url}\n")
+
+    # Non-streaming tests
+    test_chat(base_url, stream=False)
+    test_tool_call(base_url, stream=False)
+
+    # Streaming tests
+    test_chat(base_url, stream=True)
+    test_tool_call(base_url, stream=True)
+
+
+if __name__ == "__main__":
+    main()