* grammar: add test case for nullable symbol loop
Reproduce stack overflow (or OOM) with ( [x]* )* found while adding
GBNF support to ripgrep-edit.
llama-server reproducer:
curl \
-X POST \
-d '{
"messages": [{ "role": "user", "content": "write yes" }],
"grammar": "root ::= ( [x]* )*"
}' \
-H "Content-Type: application/json" \
http://localhost:8811/v1/chat/completions
* grammar: prevent stack overflow with nullable symbol loop
Fix a potential stack overflow in llama_grammar_advance_stack that
could occur when processing grammars with nullable symbols that lead
to infinite derivations of empty strings. The fix introduces cycle
detection by tracking visited stacks to prevent infinite recursion.
rg-edit regexp: llama_grammar_advance_stack
rg-edit extra-args: -A20
rg-edit directive: """Rewrite: fix the following segfault:
[..]
⚫ Testing segfault. Grammar:
root ::= ( [x]* )*
root ::= ( [x]* )*
Segmentation fault build/bin/test-grammar-integration"""
gptel-context:
(("~/llama.cpp/src/llama-grammar.cpp")
("~/llama.cpp/tests/test-grammar-integration.cpp")
("~/llama.cpp/grammars/./list.gbnf")
("~/llama.cpp/grammars/./json_arr.gbnf")
("~/llama.cpp/grammars/./json.gbnf")
("~/llama.cpp/grammars/./japanese.gbnf")
("~/llama.cpp/grammars/./english.gbnf")
("~/llama.cpp/grammars/./chess.gbnf")
("~/llama.cpp/grammars/./c.gbnf")
("~/llama.cpp/grammars/./arithmetic.gbnf")
("~/llama.cpp/grammars/./README.md"))
* grammar: convert recursive llama_grammar_advance_stack to iterative
This change converts the function to an iterative approach using
explicit stacks, which prevents deep recursion and eliminates the risk
of stack overflow.
rg-edit regexp: llama_grammar_advance_stack
rg-edit extra-args: -A30
rg-edit directive: """Rewrite: fix the following segfault:
[..]
⚫ Testing segfault. Grammar:
root ::= ( [x]* )*
root ::= ( [x]* )*
Segmentation fault build/bin/test-grammar-integration
convert from recursive to interactive"""
gptel-context:
(("~/llama.cpp/src/llama-grammar.cpp")
("~/llama.cpp/tests/test-grammar-integration.cpp")
("~/llama.cpp/grammars/./list.gbnf")
("~/llama.cpp/grammars/./json_arr.gbnf")
("~/llama.cpp/grammars/./json.gbnf")
("~/llama.cpp/grammars/./japanese.gbnf")
("~/llama.cpp/grammars/./english.gbnf")
("~/llama.cpp/grammars/./chess.gbnf")
("~/llama.cpp/grammars/./c.gbnf")
("~/llama.cpp/grammars/./arithmetic.gbnf")
("~/llama.cpp/grammars/./README.md"))
v2: Added a `std::set` to perform tree-based lookups with O(N log N)
complexity. Testing with a parallel run of `test-grammar-integration`
shows a double-digit percentage increase in runtime. An
`unordered_set` with O(1) hashing was also evaluated, but the overhead
of constructing hash keys from pointers made it significantly slower
than the rbtree implementation that only requires an ordering
operator. The performance regression in the test suite appears
justified by the overall reduction in algorithmic complexity.
Co-developed-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com>
* grammar: add test case for hang in repetition grammar processing
This commit adds a new test case to the grammar integration tests that
specifically targets a hang scenario in the repetition grammar parser
found while adding GBNF support to ripgrep-edit.
llama-server reproducer:
curl \
-X POST \
-d '{
"messages": [{ "role": "user", "content": "write yes" }],
"grammar": "root ::= (([^x]*){0,99}){0,99}"
}' \
-H "Content-Type: application/json" \
http://localhost:8811/v1/chat/completions
* grammar: add repetition threshold check
The change introduces a maximum repetition threshold to avoid
excessive rule expansion during grammar parsing. When parsing
repetition patterns like {m,n}, the parser now calculates the
potential number of rules that would be generated and throws an error
if the product of previous rules and new rules exceeds the threshold.
A test case was added to verify the threshold is properly enforced for
deeply nested repetition patterns that would otherwise cause hangs.
1493 lines
41 KiB
C++
1493 lines
41 KiB
C++
#ifdef NDEBUG
|
|
#undef NDEBUG
|
|
#endif
|
|
|
|
#include "json-schema-to-grammar.h"
|
|
|
|
#include "../src/unicode.h"
|
|
#include "../src/llama-grammar.h"
|
|
|
|
#include <nlohmann/json.hpp>
|
|
|
|
#include <cassert>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
using json = nlohmann::ordered_json;
|
|
|
|
static llama_grammar * build_grammar_with_root(const std::string & grammar_str, const char * grammar_root) {
|
|
return llama_grammar_init_impl(nullptr, grammar_str.c_str(), grammar_root, false, nullptr, 0, nullptr, 0);
|
|
}
|
|
|
|
static llama_grammar * build_grammar(const std::string & grammar_str) {
|
|
return build_grammar_with_root(grammar_str, "root");
|
|
}
|
|
|
|
static bool test_build_grammar_fails(const std::string & grammar_str) {
|
|
fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
|
|
bool grammar_fails = false;
|
|
llama_grammar * grammar = build_grammar(grammar_str);
|
|
if (grammar != nullptr) {
|
|
fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
|
|
} else {
|
|
grammar_fails = true;
|
|
fprintf(stdout, " ✅︎\n");
|
|
}
|
|
return grammar_fails;
|
|
}
|
|
|
|
struct token_and_piece {
|
|
llama_token token;
|
|
std::string piece;
|
|
};
|
|
|
|
// token() encodes a 32-bit ID as 5 bytes: a 0xff marker followed by the ID in big-endian order.
|
|
static std::string token(llama_token id) {
|
|
return std::string{
|
|
static_cast<char>(0xff),
|
|
static_cast<char>((id >> 24) & 0xff),
|
|
static_cast<char>((id >> 16) & 0xff),
|
|
static_cast<char>((id >> 8) & 0xff),
|
|
static_cast<char>(id & 0xff)
|
|
};
|
|
}
|
|
|
|
// parse_tokens() parses the token encodes above and UTF-8 text.
|
|
static std::vector<token_and_piece> parse_tokens(const std::string & input) {
|
|
std::vector<token_and_piece> result;
|
|
result.reserve(input.size());
|
|
size_t offset = 0;
|
|
while (offset < input.size()) {
|
|
try {
|
|
if (static_cast<unsigned char>(input[offset]) == 0xff) {
|
|
if (offset + 5 > input.size()) {
|
|
throw std::runtime_error("not enough bytes for token id");
|
|
}
|
|
uint32_t val =
|
|
(static_cast<unsigned char>(input[offset + 1]) << 24) |
|
|
(static_cast<unsigned char>(input[offset + 2]) << 16) |
|
|
(static_cast<unsigned char>(input[offset + 3]) << 8) |
|
|
(static_cast<unsigned char>(input[offset + 4]));
|
|
auto piece = "<[" + std::to_string(val) + "]>";
|
|
result.push_back({static_cast<llama_token>(val), piece});
|
|
offset += 5;
|
|
} else {
|
|
uint32_t cpt = unicode_cpt_from_utf8(input, offset);
|
|
result.push_back({0, unicode_cpt_to_utf8(cpt)});
|
|
}
|
|
} catch (const std::invalid_argument & /*ex*/) {
|
|
// Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
|
|
++offset;
|
|
result.push_back({0, unicode_cpt_to_utf8(0xFFFD)}); // replacement character
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static bool match_string(const std::string & input, llama_grammar * grammar) {
|
|
const auto parsed = parse_tokens(input);
|
|
|
|
auto & stacks_cur = llama_grammar_get_stacks(grammar);
|
|
|
|
for (const auto & in : parsed) {
|
|
try {
|
|
llama_grammar_accept_token(*grammar, in.token, in.piece);
|
|
} catch (const std::runtime_error & /*e*/) {
|
|
// normally this shouldn't get hit because of llama_grammar_apply
|
|
return false;
|
|
}
|
|
|
|
if (stacks_cur.empty()) {
|
|
// no stacks means that the grammar failed to match at this point
|
|
return false;
|
|
}
|
|
}
|
|
|
|
for (const auto & stack : stacks_cur) {
|
|
if (stack.empty()) {
|
|
// An empty stack means that the grammar has been completed
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
|
|
fflush(stderr);
|
|
|
|
auto * grammar = build_grammar(grammar_str);
|
|
|
|
// Save the original grammar stacks so that we can reset after every new string we want to test
|
|
const llama_grammar_stacks stacks_org = llama_grammar_get_stacks(grammar); // copy
|
|
|
|
llama_grammar_stacks & stacks_cur = llama_grammar_get_stacks(grammar);
|
|
|
|
fprintf(stderr, " 🔵 Valid strings:\n");
|
|
|
|
// Passing strings
|
|
for (const auto & test_string : passing_strings) {
|
|
fprintf(stderr, " \"%s\" ", test_string.c_str());
|
|
fflush(stderr);
|
|
|
|
bool matched = match_string(test_string, grammar);
|
|
|
|
if (!matched) {
|
|
fprintf(stderr, "❌ (failed to match)\n");
|
|
|
|
// DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
|
|
// DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
|
|
FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
|
|
if (grammar_file) {
|
|
fprintf(grammar_file, "%s", grammar_str.c_str());
|
|
fclose(grammar_file);
|
|
}
|
|
|
|
// DEBUG: Write the test string to test-grammar-integration.string.txt
|
|
FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
|
|
if (string_file) {
|
|
fprintf(string_file, "%s", test_string.c_str());
|
|
fclose(string_file);
|
|
}
|
|
|
|
fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
|
|
} else {
|
|
fprintf(stdout, "✅︎\n");
|
|
}
|
|
|
|
assert(matched);
|
|
|
|
// Reset the grammar stacks
|
|
stacks_cur = stacks_org;
|
|
}
|
|
|
|
fprintf(stderr, " 🟠 Invalid strings:\n");
|
|
|
|
// Failing strings
|
|
for (const auto & test_string : failing_strings) {
|
|
fprintf(stderr, " \"%s\" ", test_string.c_str());
|
|
fflush(stderr);
|
|
|
|
bool matched = match_string(test_string, grammar);
|
|
|
|
if (matched) {
|
|
fprintf(stderr, "❌ (incorrectly matched)\n");
|
|
} else {
|
|
fprintf(stdout, "✅︎\n");
|
|
}
|
|
assert(!matched);
|
|
|
|
// Reset the grammar stacks
|
|
stacks_cur = stacks_org;
|
|
}
|
|
|
|
// Clean up allocated memory
|
|
llama_grammar_free_impl(grammar);
|
|
}
|
|
static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
|
|
}
|
|
static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings);
|
|
}
|
|
|
|
static void test_simple_grammar() {
|
|
test_schema(
|
|
"min 0",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 0
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"0",
|
|
"10",
|
|
"12",
|
|
"10000",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-1",
|
|
"-10",
|
|
"-10000",
|
|
"-100000000000000000000000000000000",
|
|
"100000000000000000000000000000000",
|
|
"00",
|
|
"01",
|
|
"-0",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 2",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 2
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"2",
|
|
"3",
|
|
"4",
|
|
"10",
|
|
"20",
|
|
"1234567890000000",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"0",
|
|
"1",
|
|
"-1",
|
|
"-100",
|
|
"0",
|
|
"1",
|
|
"01",
|
|
"02",
|
|
"12345678900000000",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 456",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 456
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"456",
|
|
"4560",
|
|
"457",
|
|
"460",
|
|
"500",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"455",
|
|
"356",
|
|
"50",
|
|
"050",
|
|
"-1",
|
|
"-456",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min -123",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": -123
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-123",
|
|
"-122",
|
|
"-11",
|
|
"-1",
|
|
"0",
|
|
"1",
|
|
"123",
|
|
"1234",
|
|
"2345",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-1234",
|
|
"-124",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"max 9999",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"maximum": 9999
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-99999",
|
|
"0",
|
|
"9999",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"10000",
|
|
"99991",
|
|
}
|
|
);
|
|
test_schema(
|
|
"max -9999",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"maximum": -9999
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-10000",
|
|
"-9999",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-9998",
|
|
"0",
|
|
"9999",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 5 max 30",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 5,
|
|
"maximum": 30
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"5",
|
|
"10",
|
|
"30",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"05",
|
|
"4",
|
|
"-1",
|
|
"31",
|
|
"123",
|
|
"0123",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 1 max 900719925474091",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0,
|
|
"maximum": 900719925474091
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"1",
|
|
"2",
|
|
"10",
|
|
"900719925474090",
|
|
"900719925474091",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"0",
|
|
"01",
|
|
"900719925474092",
|
|
"9007199254740910",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min -1 max 1",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": -1,
|
|
"maximum": 1
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-1",
|
|
"0",
|
|
"1",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-11",
|
|
"-10",
|
|
"-2",
|
|
"2",
|
|
"10",
|
|
"11",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min -123 max 42",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": -123,
|
|
"maximum": 42
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-123",
|
|
"-122",
|
|
"-13",
|
|
"-11",
|
|
"-2",
|
|
"-1",
|
|
"0",
|
|
"1",
|
|
"5",
|
|
"10",
|
|
"39",
|
|
"40",
|
|
"42",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-0123",
|
|
"-124",
|
|
"-1123",
|
|
"-200",
|
|
"43",
|
|
"123",
|
|
"0123",
|
|
}
|
|
);
|
|
test_schema(
|
|
"exclusive min / max",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0,
|
|
"exclusiveMaximum": 10000
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"1",
|
|
"9999",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"0",
|
|
"01",
|
|
"10000",
|
|
"99999",
|
|
}
|
|
);
|
|
|
|
// Test case for a simple grammar
|
|
test_grammar(
|
|
"simple grammar",
|
|
R"""(
|
|
root ::= expr
|
|
expr ::= term ("+" term)*
|
|
term ::= number
|
|
number ::= [0-9]+)""",
|
|
// Passing strings
|
|
{
|
|
"42",
|
|
"1+2+3+4+5",
|
|
"123+456",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"+",
|
|
"/ 3",
|
|
"1+2+3+4+5+",
|
|
"12a45",
|
|
}
|
|
);
|
|
|
|
// Test case for a simple grammar with tokens
|
|
test_grammar(
|
|
"simple grammar with tokens",
|
|
R"""(
|
|
root ::= <[10]> content <[11]>
|
|
content ::= (!<[11]>)*)""",
|
|
// Passing strings
|
|
{
|
|
token(10) + "hello world" + token(11),
|
|
token(10) + "text with " + token(12) + " other tokens " + token(13) + " mixed in" + token(11),
|
|
token(10) + token(11),
|
|
token(10) + token(12) + token(13) + token(14) + token(15) + token(11),
|
|
token(10) + "a" + token(11),
|
|
},
|
|
// Failing strings
|
|
{
|
|
token(10) + "missing end token",
|
|
token(10),
|
|
"missing start token" + token(11),
|
|
token(10) + token(11) + token(11), // double end token
|
|
token(11) + "wrong order" + token(10),
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_complex_grammar() {
|
|
// Test case for a more complex grammar, with both failure strings and success strings
|
|
test_grammar(
|
|
"medium complexity grammar",
|
|
// Grammar
|
|
R"""(
|
|
root ::= expression
|
|
expression ::= term ws (("+"|"-") ws term)*
|
|
term ::= factor ws (("*"|"/") ws factor)*
|
|
factor ::= number | variable | "(" expression ")" | function-call
|
|
number ::= [0-9]+
|
|
variable ::= [a-zA-Z_][a-zA-Z0-9_]*
|
|
function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
|
|
ws ::= [ \t\n\r]?)""",
|
|
// Passing strings
|
|
{
|
|
"42",
|
|
"1*2*3*4*5",
|
|
"x",
|
|
"x+10",
|
|
"x1+y2",
|
|
"(a+b)*(c-d)",
|
|
"func()",
|
|
"func(x,y+2)",
|
|
"a*(b+c)-d/e",
|
|
"f(g(x),h(y,z))",
|
|
"x + 10",
|
|
"x1 + y2",
|
|
"(a + b) * (c - d)",
|
|
"func()",
|
|
"func(x, y + 2)",
|
|
"a * (b + c) - d / e",
|
|
"f(g(x), h(y, z))",
|
|
"123+456",
|
|
"123*456*789-123/456+789*123",
|
|
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"+",
|
|
"/ 3x",
|
|
"x + + y",
|
|
"a * / b",
|
|
"func(,)",
|
|
"func(x y)",
|
|
"(a + b",
|
|
"x + y)",
|
|
"a + b * (c - d",
|
|
"42 +",
|
|
"x +",
|
|
"x + 10 +",
|
|
"(a + b) * (c - d",
|
|
"func(",
|
|
"func(x, y + 2",
|
|
"a * (b + c) - d /",
|
|
"f(g(x), h(y, z)",
|
|
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
|
|
}
|
|
);
|
|
|
|
// Test case for a more complex grammar with tokens
|
|
test_grammar(
|
|
"complex grammar with tokens",
|
|
R"""(
|
|
root ::= reasoning+ content tool-call*
|
|
reasoning ::= <[10]> (!<[11]>)* <[11]>
|
|
content ::= <[20]> (!<[21]>)* <[21]>
|
|
tool-call ::= <[12]> name <[13]> args <[14]>
|
|
name ::= (!<[13]>)+
|
|
args ::= (!<[14]>)*)""",
|
|
// Passing strings
|
|
{
|
|
token(10) + "I am thinking" + token(11) + token(20) + "hello world!" + token(21) + token(12) + "search" + token(13) + "query=test" + token(14),
|
|
token(10) + "reasoning 1" + token(11) + token(10) + "reasoning 2" + token(11) + token(20) + token(21) + token(12) + "tool" + token(13) + token(14),
|
|
token(10) + token(11) + token(20) + "content" + token(21),
|
|
token(10) + "think" + token(12) + " nested" + token(11) + token(20) + token(10) + "more content" + token(21) + token(12) + "fn" + token(13) + "x=1,y=2" + token(14) + token(12) + "fn2" + token(13) + token(14),
|
|
token(10) + "reasoning" + token(11) + token(10) + "more" + token(11) + token(10) + "even more" + token(11) + token(20) + "text" + token(21) + token(12) + "a" + token(13) + "b" + token(14) + token(12) + "c" + token(13) + "d" + token(14),
|
|
},
|
|
// Failing strings
|
|
{
|
|
token(20) + "content only" + token(21),
|
|
token(10) + "no closing reasoning",
|
|
token(10) + token(11) + token(20) + "no closing content",
|
|
token(10) + token(11) + token(20) + token(21) + token(12) + "incomplete tool",
|
|
token(10) + token(11) + token(11) + token(20) + token(21),
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_special_chars() {
|
|
// A collection of tests to exercise special characters such as "."
|
|
test_grammar(
|
|
"special characters",
|
|
// Grammar
|
|
R"""(
|
|
root ::= ... "abc" ...
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"abcabcabc",
|
|
"aaaabcccc",
|
|
// NOTE: Also ensures that multi-byte characters still count as a single character
|
|
"🔵🟠✅abc❌🟠🔵"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"aaabcccc",
|
|
"aaaaabcccc",
|
|
"aaaabccc",
|
|
"aaaabccccc",
|
|
"🔵🟠✅❌abc❌✅🟠🔵",
|
|
"🔵🟠abc🟠🔵"
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_quantifiers() {
|
|
// A collection of tests to exercise * + and ? quantifiers
|
|
|
|
test_grammar(
|
|
"* quantifier",
|
|
// Grammar
|
|
R"""(root ::= "a"*)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"a",
|
|
"aaaaa",
|
|
"aaaaaaaaaaaaaaaaaa",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"b",
|
|
"ab",
|
|
"aab",
|
|
"ba",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
|
|
}
|
|
);
|
|
test_grammar(
|
|
"+ quantifier",
|
|
// Grammar
|
|
R"""(root ::= "a"+)""",
|
|
// Passing strings
|
|
{
|
|
"a",
|
|
"aaaaa",
|
|
"aaaaaaaaaaaaaaaaaa",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"b",
|
|
"ab",
|
|
"aab",
|
|
"ba",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
|
|
}
|
|
);
|
|
test_grammar(
|
|
"? quantifier",
|
|
// Grammar
|
|
R"""(root ::= "a"?)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"a"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"b",
|
|
"ab",
|
|
"aa",
|
|
"ba",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"mixed quantifiers",
|
|
// Grammar
|
|
R"""(
|
|
root ::= cons+ vowel* cons? (vowel cons)*
|
|
vowel ::= [aeiouy]
|
|
cons ::= [bcdfghjklmnpqrstvwxyz]
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"yes",
|
|
"no",
|
|
"noyes",
|
|
"crwth",
|
|
"four",
|
|
"bryyyy",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"yess",
|
|
"yesno",
|
|
"forty",
|
|
"catyyy",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"simple exact repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= [ab]{4}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"aaaa",
|
|
"bbbb",
|
|
"abab",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"a",
|
|
"b",
|
|
"aaaaa",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"simple min repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= [ab]{4,}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"aaaa",
|
|
"aaaaab",
|
|
"bbbb",
|
|
"ababab",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"aba",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"simple max repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= [ab]{0,4}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"a",
|
|
"aa",
|
|
"aaa",
|
|
"aaab",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"aaaaa",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"min / max repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= ("0x" [A-F0-9]{2} " "?){3,5}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"0xFF 0x12 0xAB",
|
|
"0xFF 0x12 0xAB 0x00 0x00",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"0xFF",
|
|
"0xFF 0x12",
|
|
"0xFF 0x12 0xAB 0x00 0x00 0x00",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"segfault",
|
|
// Grammar
|
|
R"""(
|
|
root ::= ( [x]* )*
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"x",
|
|
"xx"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"y",
|
|
"yy"
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_failure_missing_root() {
|
|
fprintf(stderr, "⚫ Testing missing root node:\n");
|
|
// Test case for a grammar that is missing a root rule
|
|
const std::string grammar_str = R"""(
|
|
rot ::= expr
|
|
expr ::= term ("+" term)*
|
|
term ::= number
|
|
number ::= [0-9]+)""";
|
|
|
|
llama_grammar_parser parsed_grammar;
|
|
parsed_grammar.parse(grammar_str.c_str());
|
|
|
|
// Ensure we parsed correctly
|
|
assert(!parsed_grammar.rules.empty());
|
|
|
|
// Ensure we do NOT have a root node
|
|
assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_failure_missing_reference() {
|
|
fprintf(stderr, "⚫ Testing missing reference node:\n");
|
|
|
|
// Test case for a grammar that is missing a referenced rule
|
|
const std::string grammar_str =
|
|
R"""(root ::= expr
|
|
expr ::= term ("+" term)*
|
|
term ::= numero
|
|
number ::= [0-9]+)""";
|
|
|
|
fprintf(stderr, " Expected error: ");
|
|
|
|
llama_grammar_parser parsed_grammar;
|
|
parsed_grammar.parse(grammar_str.c_str());
|
|
|
|
// Ensure we did NOT parsed correctly
|
|
assert(parsed_grammar.rules.empty());
|
|
|
|
fprintf(stderr, " End of expected error.\n");
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_failure_left_recursion() {
|
|
fprintf(stderr, "⚫ Testing left recursion detection:\n");
|
|
|
|
// Test simple left recursion detection
|
|
const std::string simple_str = R"""(root ::= "a" | root "a")""";
|
|
assert(test_build_grammar_fails(simple_str));
|
|
|
|
// Test more complicated left recursion detection
|
|
const std::string medium_str = R"""(
|
|
root ::= asdf
|
|
asdf ::= "a" | asdf "a"
|
|
)""";
|
|
assert(test_build_grammar_fails(medium_str));
|
|
|
|
// Test even more complicated left recursion detection
|
|
const std::string hard_str = R"""(
|
|
root ::= asdf
|
|
asdf ::= "a" | foo "b"
|
|
foo ::= "c" | asdf "d" | "e")""";
|
|
assert(test_build_grammar_fails(hard_str));
|
|
|
|
// Test yet even more complicated left recursion detection
|
|
const std::string hardest_str = R"""(
|
|
root ::= asdf
|
|
asdf ::= "a" | foo "b"
|
|
foo ::= "c" | empty asdf "d" | "e"
|
|
empty ::= "blah" | )""";
|
|
assert(test_build_grammar_fails(hardest_str));
|
|
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_failure_missing_root_symbol() {
|
|
fprintf(stderr, "⚫ Testing missing root symbol:\n");
|
|
|
|
const std::string grammar_str = R"""(
|
|
root ::= "foobar"
|
|
)""";
|
|
|
|
llama_grammar * failure_result = build_grammar_with_root(grammar_str, "nonexistent");
|
|
assert(failure_result == nullptr);
|
|
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_custom_root_symbol_check() {
|
|
fprintf(stderr, "⚫ Testing custom root symbol check:\n");
|
|
|
|
const std::string custom_root_grammar_str = R"""(
|
|
foobar ::= "foobar"
|
|
)""";
|
|
|
|
llama_grammar * failure_result = build_grammar_with_root(custom_root_grammar_str, "root");
|
|
assert(failure_result == nullptr);
|
|
|
|
llama_grammar * success_result = build_grammar_with_root(custom_root_grammar_str, "foobar");
|
|
assert(success_result != nullptr);
|
|
llama_grammar_free_impl(success_result);
|
|
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_json_schema() {
|
|
// Note that this is similar to the regular grammar tests,
|
|
// but we convert each json schema to a grammar before parsing.
|
|
// Otherwise, this test structure is the same.
|
|
|
|
test_schema(
|
|
"empty schema (object)",
|
|
// Schema
|
|
R"""(
|
|
{}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
R"""({})""",
|
|
R"""({"foo": "bar"})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"[]",
|
|
"null",
|
|
R"""("")""",
|
|
"true",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"exotic formats (list)",
|
|
// Schema
|
|
R"""({
|
|
"items": [
|
|
{ "format": "date" },
|
|
{ "format": "uuid" },
|
|
{ "format": "time" },
|
|
{ "format": "date-time" }
|
|
]
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
// "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
// "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
|
|
//R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
//R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""(["foo", "bar"])""",
|
|
R"""(["12345678-1234-1234-1234-1234567890ab"])""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string",
|
|
// Schema
|
|
R"""({
|
|
"type": "string"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""({})""",
|
|
R"""("foo": "bar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ min length 1",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"minLength": 1
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""({})""",
|
|
R"""("foo": "bar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ min length 3",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"minLength": 3
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("foobar")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("f")""",
|
|
R"""("fo")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ max length",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"maxLength": 3
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("")""",
|
|
R"""("f")""",
|
|
R"""("fo")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("foobar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ min & max length",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"minLength": 1,
|
|
"maxLength": 4
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("f")""",
|
|
R"""("barf")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("barfo")""",
|
|
R"""("foobar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"boolean",
|
|
// Schema
|
|
R"""({
|
|
"type": "boolean"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"true",
|
|
"false",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("true")""",
|
|
R"""(True)""",
|
|
R"""(FALSE)""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"integer",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""(0)""",
|
|
R"""(12345)""",
|
|
R"""(1234567890123456)""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""(01)""",
|
|
R"""(007)""",
|
|
R"""(12345678901234567 )""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string const",
|
|
// Schema
|
|
R"""({
|
|
"const": "foo"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""(foo)""",
|
|
R"""("bar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"non-string const",
|
|
// Schema
|
|
R"""({
|
|
"const": true
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""(true)""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""(foo)""",
|
|
R"""("true")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"non-string const",
|
|
// Schema
|
|
R"""({
|
|
"enum": ["red", "amber", "green", null, 42, ["foo"]]
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("red")""",
|
|
R"""(null)""",
|
|
R"""(42)""",
|
|
R"""(["foo"])""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""(420)""",
|
|
R"""(true)""",
|
|
R"""(foo)""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"simple pattern",
|
|
// Schema
|
|
R"""({
|
|
"pattern": "^[a-zA-Z0-9_-]*$"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("He_llo-12")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("!")""",
|
|
R"""("Hello World")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"pattern with escapes",
|
|
// Schema
|
|
R"""({
|
|
"pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("a^$.[]()|{}*+?b")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("ab")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"",
|
|
// Schema
|
|
R"""(
|
|
{
|
|
"type": ["array", "null"],
|
|
"items": { "type": "string" }
|
|
}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"null",
|
|
"[]",
|
|
"[\"123\"]",
|
|
"[\"foo\", \"bar\"]",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"[123]",
|
|
"\"foo\"",
|
|
"[\"foo\", 42]",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"min+max items",
|
|
// Schema
|
|
R"""({
|
|
"items": {
|
|
"type": ["number", "integer"]
|
|
},
|
|
"minItems": 3,
|
|
"maxItems": 5
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""([1, 2, 3])""",
|
|
R"""([1, 2, 3, 4])""",
|
|
R"""([1, 2, 3, 4, 5])""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""([1, 2])""",
|
|
R"""([1, 2, 3, 4, 5, 6])""",
|
|
R"""(1)""",
|
|
}
|
|
);
|
|
|
|
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
|
|
test_schema(
|
|
"object properties",
|
|
// Schema
|
|
R"""({
|
|
"type": "object",
|
|
"properties": {
|
|
"number": { "type": "number" },
|
|
"street_name": { "type": "string" },
|
|
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
}
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// "By default, leaving out properties is valid"
|
|
R"""({ "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
// "By extension, even an empty object is valid"
|
|
R"""({})""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
// Change datatype from number to string
|
|
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// Reorder properties
|
|
R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
|
|
// Reorder properties
|
|
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// "Additional properties default to false for generation, even though the spec says true.
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
|
|
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"additional properties can't override other properties",
|
|
R"""({
|
|
"properties": {
|
|
"a": {"type": "integer"},
|
|
"b": {"type": "integer"}
|
|
},
|
|
"additionalProperties": true
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({"a": 42})""",
|
|
R"""({"c": ""})""",
|
|
R"""({"a": 42, "c": ""})""",
|
|
R"""({"a_": ""})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""({"a": ""})""",
|
|
R"""({"a": "", "b": ""})""",
|
|
}
|
|
);
|
|
|
|
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
|
|
test_schema(
|
|
"object properties, additionalProperties: true",
|
|
// Schema
|
|
R"""({
|
|
"type": "object",
|
|
"properties": {
|
|
"number": { "type": "number" },
|
|
"street_name": { "type": "string" },
|
|
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
},
|
|
"additionalProperties": true
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
// "By extension, even an empty object is valid"
|
|
R"""({})""",
|
|
R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
|
|
// "By default, leaving out properties is valid"
|
|
R"""({ "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
// "By default, providing additional properties is valid"
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
// Change datatype from number to string
|
|
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// Reorder properties
|
|
R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
|
|
}
|
|
);
|
|
|
|
// Additional properties: false
|
|
test_schema(
|
|
"required + optional props each in original order",
|
|
// Schema
|
|
R"""({
|
|
"type": "object",
|
|
"properties": {
|
|
"number": { "type": "number" },
|
|
"street_name": { "type": "string" },
|
|
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
},
|
|
"additionalProperties": false
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({ "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_type":"Avenue"})""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// Spaces are permitted around enum values
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
// Reorder properties
|
|
R"""({ "street_type": "Avenue", "number": 1600 })""",
|
|
// Add "direction"
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"required + optional props each in original order",
|
|
// Schema
|
|
R"""({
|
|
"properties": {
|
|
"b": {"type": "string"},
|
|
"a": {"type": "string"},
|
|
"d": {"type": "string"},
|
|
"c": {"type": "string"}
|
|
},
|
|
"required": ["a", "b"],
|
|
"additionalProperties": false
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({"b": "foo", "a": "bar"})""",
|
|
R"""({"b":"foo","a":"bar","d":"qux"})""",
|
|
R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""({"a": "foo", "b": "bar"})""",
|
|
R"""({"b": "bar"})""",
|
|
R"""({"a": "foo", "c": "baz"})""",
|
|
R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
|
|
}
|
|
);
|
|
|
|
// NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
|
|
test_schema(
|
|
"required props",
|
|
// Schema
|
|
R"""({
|
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
"$id": "https://example.com/product.schema.json",
|
|
"title": "Product",
|
|
"description": "A product from Acme's catalog",
|
|
"type": "object",
|
|
"properties": {
|
|
"productId": {
|
|
"description": "The unique identifier for a product",
|
|
"type": "integer"
|
|
},
|
|
"productName": {
|
|
"description": "Name of the product",
|
|
"type": "string"
|
|
},
|
|
"price": {
|
|
"description": "The price of the product",
|
|
"type": "number",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"tags": {
|
|
"description": "Tags for the product",
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"minItems": 1,
|
|
"uniqueItems": true
|
|
},
|
|
"dimensions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"length": {
|
|
"type": "number"
|
|
},
|
|
"width": {
|
|
"type": "number"
|
|
},
|
|
"height": {
|
|
"type": "number"
|
|
}
|
|
},
|
|
"required": [ "length", "width", "height" ]
|
|
}
|
|
},
|
|
"required": [ "productId", "productName", "price" ]
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""({})""", // Missing all required properties
|
|
R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
|
|
// TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
|
|
// Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
|
|
// R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
|
|
R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
|
|
R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
|
|
// TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
|
|
// R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
|
|
}
|
|
);
|
|
}
|
|
|
|
int main() {
|
|
fprintf(stdout, "Running grammar integration tests...\n");
|
|
test_simple_grammar();
|
|
test_complex_grammar();
|
|
test_special_chars();
|
|
test_quantifiers();
|
|
test_failure_missing_root();
|
|
test_failure_missing_reference();
|
|
test_failure_left_recursion();
|
|
test_failure_missing_root_symbol();
|
|
test_custom_root_symbol_check();
|
|
test_json_schema();
|
|
fprintf(stdout, "All tests passed.\n");
|
|
return 0;
|
|
}
|