common : rework gpt-oss parser (#20393)

* common : rework gpt-oss parser * cont : fix gpt-oss tests * cont : add structured output test * cont : rename final to final_msg
2026-03-18 04:41:25 -05:00 · 2026-03-18 04:41:25 -05:00 · 5e8910a0db
commit 5e8910a0db
parent fe00a84b4b
2 changed files with 56 additions and 113 deletions
--- a/common/chat.cpp
+++ b/common/chat.cpp
@ -933,17 +933,12 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
    // Copy reasoning to the "thinking" field as expected by the gpt-oss template
    auto adjusted_messages = json::array();
-    for (const auto & msg : inputs.messages) {
+    for (auto msg : inputs.messages) {
-        auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
+        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
-        auto has_tool_calls        = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
+            msg["thinking"] = msg.at("reasoning_content");
-
+            msg.erase("content");
        if (has_reasoning_content && has_tool_calls) {
            auto adjusted_message        = msg;
            adjusted_message["thinking"] = msg.at("reasoning_content");
            adjusted_messages.push_back(adjusted_message);
        } else {
            adjusted_messages.push_back(msg);
        }
        adjusted_messages.push_back(msg);
    }
    auto prompt = common_chat_template_direct_apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
@ -969,45 +964,31 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
        "<|channel|>", "<|constrain|>", "<|message|>", "<|start|>", "<|end|>",
    };
-    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
+    auto has_tools           = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    auto has_response_format = !inputs.json_schema.is_null() && inputs.json_schema.is_object();
-    auto include_grammar   = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && has_tools;
+    auto include_grammar     = has_response_format || (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE);
    auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
-        const std::string END                = "<|end|>";
+        auto start           = p.rule("start", p.literal("<|start|>assistant"));
-        const std::string START              = "<|start|>";
+        auto end             = p.rule("end", p.literal("<|end|>"));
-        const std::string MESSAGE            = "<|message|>";
+        auto content         = p.rule("message-content", p.until("<|end|>"));
-        const std::string CHANNEL            = "<|channel|>";
+        auto channel         = p.literal("<|channel|>") + (p.literal("commentary") | p.literal("analysis"));
-        const std::string CONSTRAIN          = "<|constrain|>";
+        auto constrain_type  = p.chars("[A-Za-z0-9_-]", 1, -1);
        const std::string START_ASSISTANT    = START + "assistant";
        const std::string CHANNEL_ANALYSIS   = CHANNEL + "analysis";
        const std::string CHANNEL_COMMENTARY = CHANNEL + "commentary";
        const std::string CHANNEL_FINAL      = CHANNEL + "final";
-        auto the_end = END | p.end();
+        auto analysis = p.rule("analysis", p.literal("<|channel|>analysis<|message|>") + p.reasoning(content) + end);
        auto preamble = p.rule("preamble", p.literal("<|channel|>commentary<|message|>") + p.content(content) + end);
        auto final_msg = p.rule("final", p.literal("<|channel|>final<|message|>") + p.content(content));
        auto any = p.rule("any", preamble | analysis);
-        const std::string analysis_header  = CHANNEL_ANALYSIS + MESSAGE;
+        if (has_response_format) {
-        auto              segment_content  = p.until(END);
+            auto constraint = p.optional(p.space() + p.literal("<|constrain|>") + constrain_type);
-        auto              analysis_segment = extract_reasoning ?
+            auto response_format = p.rule("response-format",
-                                                 p.literal(analysis_header) + p.reasoning(segment_content) + p.until(END) + the_end :
+                p.literal("<|channel|>final") + constraint + p.literal("<|message|>") +
-                                                 p.content(analysis_header + p.until(END) + the_end);
+                p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
-        auto channel_header_content = p.until_one_of({ " to=functions.", MESSAGE });
+            return response_format | (analysis + p.zero_or_more(start + analysis) + start + response_format);
        auto content_header         = p.choice({ p.literal(CHANNEL_COMMENTARY), p.literal(CHANNEL_FINAL) });
        auto content_segment        = p.rule("content-segment", content_header + channel_header_content + MESSAGE +
                                                                    p.content(segment_content) + the_end);
        if (!inputs.json_schema.is_null()) {
            auto final_header = p.literal(CHANNEL_FINAL);
            auto constraint   = p.optional(p.space() + p.literal(CONSTRAIN) + channel_header_content);
            return p.optional(analysis_segment) + final_header + constraint + MESSAGE +
                   p.content(p.schema(p.json(), "response-format", inputs.json_schema));
        }
        auto segment  = p.optional(START_ASSISTANT + p.space()) + p.choice({ content_segment, analysis_segment });
        auto contents = p.optional(segment + p.repeat(p.optional(p.space()) + segment, 0, -1)) + p.end();
        // Tool call parser
        if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
            auto tool_choice = p.choice();
@ -1016,42 +997,37 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
                std::string  name     = function.at("name");
                const auto & params   = function.at("parameters");
-                // Tool call can appear as:
+                auto func_name  = p.literal(" to=functions.") + p.tool_name(p.literal(name));
-                // 1. In role header: " to=functions.NAME<|channel|>..."
+                auto constraint = p.optional(p.space() + p.literal("<|constrain|>") + constrain_type);
                // 2. In channel: "<|channel|>(analysis|commentary) to=functions.NAME..."
                auto func_name = p.literal(" to=functions.") + p.tool_name(p.literal(name));
                auto channel    = p.literal(CHANNEL_COMMENTARY) | p.literal(CHANNEL_ANALYSIS);
                auto constraint = p.space() + p.optional(p.literal(CONSTRAIN) + channel_header_content);
                auto args       = p.tool_args(p.schema(p.json(), "tool-" + name + "-schema", params));
-                // Pattern 1: recipient in role header
+                // recipient in role header
-                // " to=functions.NAME<|channel|>(analysis|commentary)[constraint]<|message|>ARGS"
+                //   <|start|>assistant to=functions.NAME<|channel|>(commentary|analysis)[constraint]<|message|>ARGS
-                auto tool_in_role = p.tool(p.tool_open(func_name + channel) + constraint + MESSAGE + args);
+                auto tool_in_role = p.tool(p.tool_open(func_name + channel + constraint + p.literal("<|message|>")) + args);
-                // Pattern 2: recipient in channel header
+                // recipient in channel header
-                // "<|channel|>(analysis|commentary) to=functions.NAME[constraint]<|message|>ARGS"
+                //   <|channel|>(commentary|analysis) to=functions.NAME[constraint]<|message|>ARGS
-                auto tool_in_channel = p.tool(channel + p.tool_open(func_name + constraint + MESSAGE) + args);
+                auto tool_in_channel = p.tool(p.tool_open(channel + func_name + constraint + p.literal("<|message|>")) + args);
-                tool_choice |= tool_in_role | tool_in_channel;
+                tool_choice |= p.rule("tool-" + name, tool_in_role | tool_in_channel);
            });
-            auto min_calls = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED ? 1 : 0;
+            auto tool_call  = p.trigger_rule("tool-call", tool_choice);
            auto max_calls = inputs.parallel_tool_calls ? -1 : 1;
-            auto role_start = p.optional(p.space() + p.literal(START_ASSISTANT));
+            if (inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED) {
-            auto tool_call  = p.rule("tool-call", p.repeat(role_start + tool_choice, min_calls, max_calls) + p.end());
+                return tool_call | ( any + p.zero_or_more(start + any) + start + tool_call);
            }
-            return p.choice({ p.trigger_rule("single-tool", tool_call), p.trigger_rule("tools", p.one_or_more(segment) + tool_call) });
+            return tool_call | final_msg | (any + p.zero_or_more(start + any) + start + (tool_call | final_msg));
        }
-        return contents;
+        return final_msg | (any + p.zero_or_more(start + any) + start + final_msg);
    });
    data.parser = parser.save();
    if (include_grammar) {
-        data.grammar_lazy = has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
+        data.grammar_lazy = !(has_response_format || (has_tools && inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED));
        data.grammar      = build_grammar([&](const common_grammar_builder & builder) {
            foreach_function(inputs.tools, [&](const json & tool) {
                const auto & function = tool.at("function");
@ -1062,10 +1038,9 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
        });
        data.grammar_triggers = {
-            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)"               },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "^\\s+to$" },
-            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "(?:<\\|end\\|>)(?:<\\|start\\|>assistant\\s*)?(\\s+to=functions)" },
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "<\\|start\\|>assistant(\\s+to)" },
-            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
+            { COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN, "<\\|start\\|>assistant(<\\|channel\\|>(?:commentary|analysis)\\s+to)" }
             "(?:<\\|start\\|>assistant\\s*)?(<\\|channel\\|>(?:commentary|analysis)\\s+to=functions)"                }
        };
    }
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@ -2448,7 +2448,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
        // Analysis channel (reasoning) with final channel (content)
        tst.test(
-               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's "
               "up?")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .expect(message_assist_thoughts)
@ -2461,15 +2461,6 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            .expect_reasoning("I'm\nthinking")
            .run();
        // Reasoning format none - reasoning stays in content
        tst.test(
               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n<|channel|>final<|message|>Hello, world!\nWhat's "
               "up?")
            .reasoning_format(COMMON_REASONING_FORMAT_NONE)
            .expect_content(
                "<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?")
            .run();
        // Tool call with recipient in role header: " to=functions.NAME<|channel|>analysis<|message|>JSON"
        tst.test(" to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
            .tools({ special_function_tool })
@ -2496,37 +2487,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
        // Tool call with reasoning + content (analysis first, then tool call)
        tst.test(
-               "<|channel|>analysis<|message|>I'm\nthinking<|end|>\n"
+               "<|channel|>analysis<|message|>I'm\nthinking<|end|>"
               "<|start|>assistant to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect(message_assist_call_thoughts)
            .run();
-        // Tool calling with extra channel before
+        // Complex tool calling
        tst.test(
-                "<|channel|>analysis<|message|>I'm\nthinking<|end|><|start|>assistant<|channel|>commentary"
+            "<|channel|>analysis<|message|>Thinking about edit...<|end|>"
                " to=functions.special_function <|message|>{\"arg1\": 1}")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect(message_assist_call_thoughts)
            .run();
        // Reasoning after final channel
        // Tool calling after final channel
        tst.test(
            "<|channel|>final<|message|><|end|>"
            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit..."
        )
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .expect_reasoning("Thinking about edit...")
            .expect_content("")
            .run();
        // Tool calling after final channel
        tst.test(
            "<|channel|>final<|message|><|end|>"
            "<|start|>assistant<|channel|>analysis<|message|>Thinking about edit...<|end|>"
            "<|start|>assistant<|channel|>commentary to=functions.edit <|constrain|>json"
            "<|message|>{\"oldString\": \"if (part < railCount - 1) {\", \"newString\": \"if (part < 4) {\", \"replaceAll\": false}"
            )
@ -2561,19 +2531,17 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            })
            .run();
-        // Parallel tool calls
+        // Structured output
        tst.test(
-               " to=functions.special_function<|channel|>analysis<|message|>{\"arg1\": 1}\n"
+            "<|channel|>analysis<|message|>I need to output the invoice details in JSON<|end|>"
-               "<|start|>assistant to=functions.special_function_with_opt<|channel|>analysis<|message|>{\"arg1\": 1, "
+            "<|start|>assistant<|channel|>final <|constrain|>json"
-               "\"arg2\": 2}")
+            "<|message|>"
-            .parallel_tool_calls(true)
+            R"({"amount": 123.45, "date": "2025-12-03"})"
-            .tools({
+            )
-                special_function_tool, special_function_tool_with_optional_param
+            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-        })
+            .json_schema(invoice_schema)
-            .expect_tool_calls({
+            .expect_reasoning("I need to output the invoice details in JSON")
-                { "special_function", R"({"arg1": 1})", {} },
+            .expect_content(R"({"amount": 123.45, "date": "2025-12-03"})")
                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
            })
            .run();
    }