common : gracefully handle incomplete output (#20191)

* common : handle incomplete UTF-8 at end of input in PEG parser

* cont : if reached end prematurely, emit needs_more_input to propagate partial output

* cont: refactor peg parse context to add lenient flag

* cont : remove partial flag, keep lenient flag
This commit is contained in:
Aldehir Rojas 2026-03-08 11:17:02 -05:00 committed by GitHub
parent 9b24886f78
commit 451ef08432
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 139 additions and 125 deletions

View file

@ -120,7 +120,7 @@ void test_basic(testing & t) {
return p.literal("hello") + p.optional(p.literal(" world"));
});
auto ctx = common_peg_parse_context("hello", false);
auto ctx = common_peg_parse_context("hello");
auto result = parser.parse(ctx);
t.assert_equal("optional_absent", true, result.success());
t.assert_equal("optional_absent_end", 5u, result.end);
@ -132,7 +132,7 @@ void test_basic(testing & t) {
return p.literal("hello") + p.optional(p.literal(" world"));
});
auto ctx = common_peg_parse_context("hello ", true);
auto ctx = common_peg_parse_context("hello ", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("partial_match_need_more", true, result.need_more_input());
});
@ -215,7 +215,7 @@ void test_basic(testing & t) {
t.test("sequence_partial_match_1", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
auto ctx = common_peg_parse_context("<thi", true);
auto ctx = common_peg_parse_context("<thi", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("sequence_partial_match_1", true, result.need_more_input());
});
@ -224,7 +224,7 @@ void test_basic(testing & t) {
t.test("sequence_partial_match_2", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("begin") + p.literal("end"); });
auto ctx = common_peg_parse_context("begin", true);
auto ctx = common_peg_parse_context("begin", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("sequence_partial_match_2", true, result.need_more_input());
});
@ -233,7 +233,7 @@ void test_basic(testing & t) {
t.test("sequence_partial_match_3", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
auto ctx = common_peg_parse_context("<think></", true);
auto ctx = common_peg_parse_context("<think></", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("sequence_partial_match_3", true, result.need_more_input());
});
@ -242,7 +242,7 @@ void test_basic(testing & t) {
t.test("sequence_full_match", [&](testing & t) {
auto common_chat_combinator_parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("hello") + p.literal("world"); });
auto ctx = common_peg_parse_context("helloworld", false);
auto ctx = common_peg_parse_context("helloworld");
auto result = common_chat_combinator_parser.parse(ctx);
t.assert_equal("sequence_full_match", true, result.success());
});
@ -251,7 +251,7 @@ void test_basic(testing & t) {
t.test("sequence_no_match", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("<think>") + p.literal("</think>"); });
auto ctx = common_peg_parse_context("<think>I am common_chat_combinator_parser", true);
auto ctx = common_peg_parse_context("<think>I am common_chat_combinator_parser", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("sequence_no_match", true, result.fail());
});
@ -260,7 +260,7 @@ void test_basic(testing & t) {
t.test("choices_partial_match_1", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("option1") | p.literal("option2"); });
auto ctx = common_peg_parse_context("opt", true);
auto ctx = common_peg_parse_context("opt", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("choices_partial_match_1", true, result.need_more_input());
});
@ -270,7 +270,7 @@ void test_basic(testing & t) {
auto parser =
build_peg_parser([](common_peg_parser_builder & p) { return p.literal("choice_a") | p.literal("choice_b"); });
auto ctx = common_peg_parse_context("choice", true);
auto ctx = common_peg_parse_context("choice", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("choices_partial_match_2", true, result.need_more_input());
});
@ -279,7 +279,7 @@ void test_basic(testing & t) {
t.test("choices_full_match_1", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("first") | p.literal("second"); });
auto ctx = common_peg_parse_context("first", false);
auto ctx = common_peg_parse_context("first");
auto result = parser.parse(ctx);
t.assert_equal("choices_full_match_1", true, result.success());
});
@ -288,7 +288,7 @@ void test_basic(testing & t) {
t.test("choices_full_match_2", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("alpha") | p.literal("beta"); });
auto ctx = common_peg_parse_context("beta", false);
auto ctx = common_peg_parse_context("beta");
auto result = parser.parse(ctx);
t.assert_equal("choices_full_match_2", true, result.success());
});
@ -297,7 +297,7 @@ void test_basic(testing & t) {
t.test("choices_no_match", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.literal("good") | p.literal("better"); });
auto ctx = common_peg_parse_context("best", false);
auto ctx = common_peg_parse_context("best");
auto result = parser.parse(ctx);
t.assert_equal("choices_no_match", true, result.fail());
});
@ -306,7 +306,7 @@ void test_basic(testing & t) {
t.test("zero_or_more_partial_match_1", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("ab")); });
auto ctx = common_peg_parse_context("a", true);
auto ctx = common_peg_parse_context("a", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("zero_or_more_partial_match_1", true, result.need_more_input());
});
@ -315,7 +315,7 @@ void test_basic(testing & t) {
t.test("zero_or_more_partial_match_2", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("xy")); });
auto ctx = common_peg_parse_context("xyx", true);
auto ctx = common_peg_parse_context("xyx", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("zero_or_more_partial_match_2", true, result.need_more_input());
});
@ -324,7 +324,7 @@ void test_basic(testing & t) {
t.test("zero_or_more_full_match", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.zero_or_more(p.literal("test")); });
auto ctx = common_peg_parse_context("test", false);
auto ctx = common_peg_parse_context("test");
auto result = parser.parse(ctx);
t.assert_equal("zero_or_more_full_match", true, result.success());
});
@ -333,7 +333,7 @@ void test_basic(testing & t) {
t.test("one_or_more_partial_match_1", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("repeat")); });
auto ctx = common_peg_parse_context("rep", true);
auto ctx = common_peg_parse_context("rep", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("one_or_more_partial_match_1", true, result.need_more_input());
});
@ -342,7 +342,7 @@ void test_basic(testing & t) {
t.test("one_or_more_partial_match_2", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("ab")); });
auto ctx = common_peg_parse_context("aba", true);
auto ctx = common_peg_parse_context("aba", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
t.assert_equal("one_or_more_partial_match_2", true, result.need_more_input());
});
@ -351,7 +351,7 @@ void test_basic(testing & t) {
t.test("one_or_more_full_match", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("single")); });
auto ctx = common_peg_parse_context("single", false);
auto ctx = common_peg_parse_context("single");
auto result = parser.parse(ctx);
t.assert_equal("one_or_more_full_match", true, result.success());
});
@ -360,7 +360,7 @@ void test_basic(testing & t) {
t.test("one_or_more_no_match", [&](testing & t) {
auto parser = build_peg_parser([](common_peg_parser_builder & p) { return p.one_or_more(p.literal("()")); });
auto ctx = common_peg_parse_context("success", false);
auto ctx = common_peg_parse_context("success");
auto result = parser.parse(ctx);
t.assert_equal("one_or_more_no_match", true, result.fail());
});
@ -376,7 +376,7 @@ void test_basic(testing & t) {
return p.rule("value", p.ref("number") | p.ref("list"));
});
common_peg_parse_context ctx("1", false);
common_peg_parse_context ctx("1");
auto result = value_parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
@ -390,7 +390,7 @@ void test_basic(testing & t) {
return p.rule("value", p.ref("number") | p.ref("list"));
});
common_peg_parse_context ctx("[1]", false);
common_peg_parse_context ctx("[1]");
auto result = value_parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
@ -404,7 +404,7 @@ void test_basic(testing & t) {
return p.rule("value", p.ref("number") | p.ref("list"));
});
common_peg_parse_context ctx("[[2]]", false);
common_peg_parse_context ctx("[[2]]");
auto result = value_parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
@ -418,7 +418,7 @@ void test_basic(testing & t) {
return p.rule("value", p.ref("number") | p.ref("list"));
});
common_peg_parse_context ctx("[[[3]]]", false);
common_peg_parse_context ctx("[[[3]]]");
auto result = value_parser.parse(ctx);
t.assert_equal("result_is_success", true, result.success());
@ -432,7 +432,7 @@ void test_basic(testing & t) {
return p.rule("value", p.ref("number") | p.ref("list"));
});
common_peg_parse_context ctx("[[", true);
common_peg_parse_context ctx("[[", COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = value_parser.parse(ctx);
t.assert_equal("result_is_need_more_input", true, result.need_more_input());
@ -446,7 +446,7 @@ void test_basic(testing & t) {
return p.rule("value", p.ref("number") | p.ref("list"));
});
common_peg_parse_context ctx("[a]", false);
common_peg_parse_context ctx("[a]");
auto result = value_parser.parse(ctx);
t.assert_equal("result_is_fail", true, result.fail());
@ -458,8 +458,8 @@ void test_basic(testing & t) {
return p.marker();
});
common_peg_parse_context ctx_square("[marker]", false);
common_peg_parse_context ctx_sharp("<marker>", false);
common_peg_parse_context ctx_square("[marker]");
common_peg_parse_context ctx_sharp("<marker>");
auto result_square = bracket_parser.parse(ctx_square);
auto result_sharp = bracket_parser.parse(ctx_sharp);