common : fix Step-3.5-Flash format detection and thinking support (#19635)
* common : fix Step-3.5-Flash format detection and thinking support Step-3.5-Flash uses the same XML-style tool call format as Qwen3-Coder (<tool_call><function=...><parameter=...>) but its Jinja template lacks the bare <function> and plural <parameters> markers that the detection logic previously required. This caused it to fall through to Hermes 2 Pro, which doesn't call func_args_not_string(), so arguments stayed as JSON strings and templates using arguments|items crashed. Additionally, the Qwen3-Coder-XML format handler had no thinking support. Models like Step-3.5-Flash that unconditionally emit <think> in their generation prompt need the same thinking_forced_open handling that Nemotron v3 and Hermes 2 Pro already have, otherwise reasoning_content is never separated from content in API responses. Changes: - Relax Qwen3-Coder XML detection to only require the 3 shared markers - Tighten Nemotron v3 branch to also require bare <function> and plural <parameters>, preventing Step-3.5-Flash from being misrouted via <think> - Add thinking_forced_open support to Qwen3-Coder-XML init function - Add <think>/</think> to preserved tokens - Fix build_grammar_xml_tool_call to handle thinking_forced_open in the grammar root rule, allowing </think> before tool calls - Add Step-3.5-Flash chat template and format detection test Builds on: https://github.com/ggml-org/llama.cpp/pull/19283 * chat : route Step-3.5-Flash to Nemotron v3 PEG parser, add tests Step-3.5-Flash uses the same XML tool call format as Qwen3-Coder and Nemotron 3 Nano (<tool_call>/<function=...>/<parameter=...>) but with unconditional <think> output. Route it to the Nemotron v3 PEG parser for streaming and schema-aware parameter parsing. Detection: templates with <think> + XML tool tags use Nemotron v3 PEG parser; templates without <think> (Qwen3-Coder) use GBNF grammar. Tests cover: basic messages, tool calls with/without thinking content, parallel tool calls, code string parameters, optional </parameter> closing tags, and JSON schema response format. * chat : remove dead thinking code from qwen3_coder_xml Remove thinking handling code that became unreachable after routing Step-3.5-Flash to the Nemotron v3 PEG parser. Qwen3-Coder has no <think> in its template, so the thinking_forced_open logic, preserved tokens, and grammar prefix were dead paths.
This commit is contained in:
parent
39e4b1dc9b
commit
3dadc88b58
3 changed files with 297 additions and 5 deletions
|
|
@ -3553,6 +3553,28 @@ Hey there!<|im_end|>
|
|||
auto grammar = build_grammar(params.grammar);
|
||||
GGML_ASSERT(grammar && "Failed to build Qwen3-Coder grammar with union types");
|
||||
}
|
||||
|
||||
{
|
||||
// Step-3.5-Flash template: uses same XML output format as Qwen3-Coder and Nemotron v3,
|
||||
// but with <think> support. Routes to the Nemotron v3 PEG parser for streaming and
|
||||
// schema-aware parameter parsing.
|
||||
auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
|
||||
assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
||||
|
||||
// Grammar and PEG parser should be generated with thinking_forced_open
|
||||
{
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.messages = { message_user };
|
||||
inputs.tools = { special_function_tool };
|
||||
auto params = common_chat_templates_apply(tmpls.get(), inputs);
|
||||
assert_equals(COMMON_CHAT_FORMAT_PEG_CONSTRUCTED, params.format);
|
||||
assert_equals(true, params.thinking_forced_open);
|
||||
assert_equals(false, params.grammar.empty());
|
||||
assert_equals(false, params.parser.empty());
|
||||
auto grammar = build_grammar(params.grammar);
|
||||
GGML_ASSERT(grammar && "Failed to build Step-3.5-Flash grammar");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void test_template_output_peg_parsers() {
|
||||
|
|
@ -3799,6 +3821,196 @@ static void test_template_output_peg_parsers() {
|
|||
});
|
||||
}
|
||||
|
||||
{
|
||||
// Step-3.5-Flash (uses Nemotron v3 PEG parser with thinking_forced_open)
|
||||
// Unlike Nemotron, Step-3.5-Flash always emits <think> regardless of enable_thinking,
|
||||
// so all inputs must include a </think> delimiter.
|
||||
auto tmpls = read_templates("models/templates/stepfun-ai-Step-3.5-Flash.jinja");
|
||||
|
||||
// Test basic message with reasoning
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "I'm\nthinking\n</think>\nHello, world!\nWhat's up?";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
|
||||
t.expect = message_assist_thoughts;
|
||||
});
|
||||
|
||||
// Test basic message without thinking content
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input = "</think>\nHello, world!\nWhat's up?";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
|
||||
t.expect = message_assist;
|
||||
});
|
||||
|
||||
// Test tool call without thinking content
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.tools = {special_function_tool};
|
||||
|
||||
t.expect = message_assist_call;
|
||||
});
|
||||
|
||||
// Test tool call with thinking
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"I'm\nthinking\n</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.tools = {special_function_tool};
|
||||
|
||||
t.expect = message_assist_call_thoughts;
|
||||
});
|
||||
|
||||
// Test parallel tool calls with thinking
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"I'm\nthinking\n</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function_with_opt>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=arg2>\n"
|
||||
"2\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.parallel_tool_calls = true;
|
||||
t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
|
||||
|
||||
t.expect.reasoning_content = "I'm\nthinking";
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "special_function",
|
||||
/* .arguments = */ R"({"arg1": 1})",
|
||||
/* .id = */ {},
|
||||
}, {
|
||||
/* .name = */ "special_function_with_opt",
|
||||
/* .arguments = */ R"({"arg1": 1, "arg2": 2})",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test parallel tool calls without thinking content
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=special_function_with_opt>\n"
|
||||
"<parameter=arg1>\n"
|
||||
"1\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=arg2>\n"
|
||||
"2\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.parallel_tool_calls = true;
|
||||
t.params.tools = {special_function_tool, special_function_tool_with_optional_param};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "special_function",
|
||||
/* .arguments = */ R"({"arg1": 1})",
|
||||
/* .id = */ {},
|
||||
}, {
|
||||
/* .name = */ "special_function_with_opt",
|
||||
/* .arguments = */ R"({"arg1": 1, "arg2": 2})",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test tool call with code string parameter
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=python>\n"
|
||||
"<parameter=code>\n"
|
||||
"def hello():\n"
|
||||
" print(\"Hello, world!\")\n"
|
||||
"\n"
|
||||
"hello()\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.tools = {python_tool};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "python",
|
||||
/* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test tool call with string parameter and no closing </parameter> tag
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"</think>\n"
|
||||
"<tool_call>\n"
|
||||
"<function=python>\n"
|
||||
"<parameter=code>\n"
|
||||
"def hello():\n"
|
||||
" print(\"Hello, world!\")\n"
|
||||
"\n"
|
||||
"hello()\n"
|
||||
"</function>\n"
|
||||
"</tool_call>";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.tools = {python_tool};
|
||||
|
||||
t.expect.tool_calls = {{
|
||||
/* .name = */ "python",
|
||||
/* .arguments = */ "{\"code\": \"def hello():\\n print(\\\"Hello, world!\\\")\\n\\nhello()\"}",
|
||||
/* .id = */ {},
|
||||
}};
|
||||
});
|
||||
|
||||
// Test response format (JSON schema with thinking)
|
||||
test_peg_parser(tmpls.get(), [&](auto & t) {
|
||||
t.input =
|
||||
"I need to output the invoice details in JSON\n"
|
||||
"</think>\n"
|
||||
R"({"amount": 123.45, "date": "2025-12-03"})";
|
||||
t.params.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
|
||||
t.params.json_schema = invoice_schema;
|
||||
|
||||
t.expect.reasoning_content = "I need to output the invoice details in JSON";
|
||||
t.expect.content = R"({"amount": 123.45, "date": "2025-12-03"})";
|
||||
});
|
||||
}
|
||||
|
||||
{
|
||||
// Solar-Open-100B
|
||||
auto tmpls = read_templates("models/templates/upstage-Solar-Open-100B.jinja");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue