common : fix tool call type detection for nullable and enum schemas (#21327)

* common : fix tool call type detection for nullable and enum schemas

* common, tests : fix grammar delegation for nullable/enum schemas and add tests

Fix enum type inference to scan all enum values (not just index 0) so
schemas like {"enum": [0, "celsius"]} correctly detect string type.

Fix schema_delegates in peg-parser to handle nullable type arrays
(["string", "null"]) and typeless enum schemas in raw mode, allowing
the tagged parser to use raw text instead of JSON-formatted strings.

Add test cases for Qwen3-Coder (TAG_WITH_TAGGED format):
- nullable string ["string", "null"]
- nullable string with null first ["null", "string"]
- nullable integer ["integer", "null"]
- enum without explicit type key
This commit is contained in:
Samanvya Tripathi 2026-04-03 11:51:23 -04:00 committed by GitHub
parent 277ff5fff7
commit af5c13841f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 185 additions and 10 deletions

View file

@ -400,12 +400,34 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
for (const auto & [param_name, param_schema] : properties.items()) {
bool is_required = required.find(param_name) != required.end();
std::string type = "object";
auto type_obj = param_schema.contains("type") ? param_schema.at("type") : json::object();
if (type_obj.is_string()) {
type_obj.get_to(type);
} else if (type_obj.is_object()) {
if (type_obj.contains("type") && type_obj.at("type").is_string()) {
type_obj.at("type").get_to(type);
if (param_schema.contains("type")) {
const auto & type_obj = param_schema.at("type");
if (type_obj.is_string()) {
type_obj.get_to(type);
} else if (type_obj.is_array()) {
// Handle nullable types like ["string", "null"]
for (const auto & t : type_obj) {
if (t.is_string() && t.get<std::string>() != "null") {
type = t.get<std::string>();
break;
}
}
} else if (type_obj.is_object()) {
if (type_obj.contains("type") && type_obj.at("type").is_string()) {
type_obj.at("type").get_to(type);
}
}
}
// Infer string type from enum values when type is unspecified
if (type == "object" && param_schema.contains("enum")) {
const auto & enum_vals = param_schema.at("enum");
if (enum_vals.is_array()) {
for (const auto & v : enum_vals) {
if (v.is_string()) {
type = "string";
break;
}
}
}
}
@ -574,9 +596,33 @@ common_peg_parser analyze_tools::build_tool_parser_tag_gemma4_dict(parser_build_
std::vector<arg_entry> arg_entries;
for (const auto & [param_name, param_schema] : properties.items()) {
std::string type = "object";
auto type_v = param_schema.contains("type") ? param_schema.at("type") : json::object();
if (type_v.is_string()) type_v.get_to(type);
std::string type = "object";
if (param_schema.contains("type")) {
const auto & type_v = param_schema.at("type");
if (type_v.is_string()) {
type_v.get_to(type);
} else if (type_v.is_array()) {
// Handle nullable types like ["string", "null"]
for (const auto & t : type_v) {
if (t.is_string() && t.get<std::string>() != "null") {
type = t.get<std::string>();
break;
}
}
}
}
// Infer string type from enum values when type is unspecified
if (type == "object" && param_schema.contains("enum")) {
const auto & enum_vals = param_schema.at("enum");
if (enum_vals.is_array()) {
for (const auto & v : enum_vals) {
if (v.is_string()) {
type = "string";
break;
}
}
}
}
common_peg_parser value_parser = p.eps();
if (type == "string") {

View file

@ -1561,7 +1561,23 @@ void common_peg_arena::build_grammar(const common_grammar_builder & builder, boo
if (!s.schema) {
return true;
}
if (s.raw && s.schema->contains("type") && s.schema->at("type").is_string() && s.schema->at("type") == "string") {
if (s.raw && s.schema->contains("type")) {
const auto & type_val = s.schema->at("type");
if (type_val.is_string() && type_val == "string") {
return true;
}
// Handle nullable types like ["string", "null"] - delegate when the
// non-null type is string, since the tagged format uses raw text
if (type_val.is_array()) {
for (const auto & t : type_val) {
if (t.is_string() && t.get<std::string>() != "null") {
return t.get<std::string>() == "string";
}
}
}
}
// Delegate for enum schemas in raw mode - enum values are literal strings
if (s.raw && !s.schema->contains("type") && s.schema->contains("enum")) {
return true;
}
return false;