common : gracefully handle incomplete output (#20191)

* common : handle incomplete UTF-8 at end of input in PEG parser

* cont : if reached end prematurely, emit needs_more_input to propagate partial output

* cont: refactor peg parse context to add lenient flag

* cont : remove partial flag, keep lenient flag
This commit is contained in:
Aldehir Rojas 2026-03-08 11:17:02 -05:00 committed by GitHub
parent 9b24886f78
commit 451ef08432
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 139 additions and 125 deletions

View file

@ -58,7 +58,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, true);
common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
// Assert result type matches
@ -101,7 +101,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, true);
common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
// Assert result type matches
@ -142,7 +142,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, true);
common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
// Assert result type matches
@ -187,7 +187,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, true);
common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
// Assert result type matches
@ -225,7 +225,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, false);
common_peg_parse_context ctx(tc.input);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);
@ -259,7 +259,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, true);
common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);
@ -293,7 +293,7 @@ void test_unicode(testing &t) {
std::string test_name = "case " + std::to_string(i) + ": " + hex_dump(tc.input);
t.test(test_name, [&](testing &t) {
common_peg_parse_context ctx(tc.input, false);
common_peg_parse_context ctx(tc.input);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);
@ -330,7 +330,7 @@ void test_unicode(testing &t) {
return p.sequence({p.json_string_content(), p.literal("\"")});
});
common_peg_parse_context ctx(tc.input, false);
common_peg_parse_context ctx(tc.input);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);
@ -367,7 +367,7 @@ void test_unicode(testing &t) {
return p.json_string_content();
});
common_peg_parse_context ctx(tc.input, true);
common_peg_parse_context ctx(tc.input, COMMON_PEG_PARSE_FLAG_LENIENT);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);
@ -390,9 +390,6 @@ void test_unicode(testing &t) {
// Invalid continuation byte
{std::string("\xC3\x28"), "", COMMON_PEG_PARSE_RESULT_FAIL},
// Overlong encoding (security issue)
{std::string("\xC0\x80"), "", COMMON_PEG_PARSE_RESULT_FAIL},
};
for (size_t i = 0; i < test_cases.size(); i++) {
@ -404,7 +401,7 @@ void test_unicode(testing &t) {
return p.json_string_content();
});
common_peg_parse_context ctx(tc.input, false);
common_peg_parse_context ctx(tc.input);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);
@ -433,7 +430,7 @@ void test_unicode(testing &t) {
return p.sequence({p.json_string_content(), p.literal("\"")});
});
common_peg_parse_context ctx(tc.input, false);
common_peg_parse_context ctx(tc.input);
auto result = parser.parse(ctx);
assert_result_equal(t, tc.expected_result, result.type);