tests: provide all non-ascii data hex encoded

- make the scanner not whitelist anything for test cases making
  everything non-ascii forced to be hex encoded

- update all tests using non-ascii bytes to use %hex[] sequences

Closes #17331
This commit is contained in:
Daniel Stenberg 2025-05-13 08:12:26 +02:00
parent 65444f1dc3
commit 11cad7bf32
No known key found for this signature in database
GPG key ID: 5CC908FDB71E12C2
34 changed files with 93 additions and 96 deletions

View file

@ -158,11 +158,19 @@ while(my $filename = <$git_ls_files>) {
push @err, "content: has binary contents";
}
$content =~ s/[$non_ascii_allowed]//g;
if($filename !~ /tests\/data/) {
# the tests have no allowed UTF bytes
$content =~ s/[$non_ascii_allowed]//g;
}
if(!fn_match($filename, @non_ascii) &&
($content =~ /([\x80-\xff]+)/ && $content !~ /^(codeset-utf8|Unicode|non-ascii)/m)) {
push @err, "content: has non-ASCII: '$1'";
($content =~ /([\x80-\xff]+)/)) {
my $non = $1;
my $hex;
for my $e (split(//, $non)) {
$hex .= sprintf("%s%02x", $hex ? " ": "", ord($e));
}
push @err, "content: has non-ASCII: '$non' ($hex)";
}
if(@err) {