spacecheck.pl: check for non-ASCII chars, fix fallouts

Reported-by: James Fuller
Assisted-by: Dan Fandrich

Closes #17247
This commit is contained in:
Viktor Szakats 2025-05-03 18:11:29 +02:00
parent e1f87a093b
commit 838dc53bb7
No known key found for this signature in database
GPG key ID: B5ABD165E2AEF201
4 changed files with 38 additions and 6 deletions

View file

@ -47,6 +47,31 @@ my @space_at_eol = (
"^tests/data/test", "^tests/data/test",
); );
my @non_ascii_allowed = (
'\xC3\xA1', # UTF-8 for https://codepoints.net/U+00E1 LATIN SMALL LETTER A WITH ACUTE
'\xC3\xA5', # UTF-8 for https://codepoints.net/U+00E5 LATIN SMALL LETTER A WITH RING ABOVE
'\xC3\xA4', # UTF-8 for https://codepoints.net/U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
'\xC3\xB6', # UTF-8 for https://codepoints.net/U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
'\xC2\xB1', # UTF-8 for https://codepoints.net/U+00B1 PLUS-MINUS SIGN
'\xC2\xA7', # UTF-8 for https://codepoints.net/U+00A7 SECTION SIGN
'\xC3\x9F', # UTF-8 for https://codepoints.net/U+00DF LATIN SMALL LETTER SHARP S
'\xF0\x9F\x99\x8F', # UTF-8 for https://codepoints.net/U+1f64f PERSON WITH FOLDED HANDS
);
my $non_ascii_allowed = join(', ', @non_ascii_allowed);
my @non_ascii = (
".github/scripts/spellcheck.words",
".mailmap",
"RELEASE-NOTES",
"docs/BINDINGS.md",
"docs/CIPHERS.md",
"docs/THANKS",
"docs/THANKS-filter",
"tests/libtest/lib1560.c",
"^tests/data/test",
);
sub fn_match { sub fn_match {
my ($filename, @masklist) = @_; my ($filename, @masklist) = @_;
@ -134,6 +159,13 @@ while(my $filename = <$git_ls_files>) {
push @err, "content: has binary contents"; push @err, "content: has binary contents";
} }
$content =~ s/[$non_ascii_allowed]//g;
if(!fn_match($filename, @non_ascii) &&
$content =~ /([\x80-\xff]+)/) {
push @err, "content: has non-ASCII: '$1'";
}
if(@err) { if(@err) {
$issues++; $issues++;
foreach my $err (@err) { foreach my $err (@err) {

View file

@ -2030,9 +2030,9 @@ function(curl_transform_makefile_inc _input_file _output_file)
string(REPLACE "$(top_srcdir)" "\${PROJECT_SOURCE_DIR}" _makefile_inc_text ${_makefile_inc_text}) string(REPLACE "$(top_srcdir)" "\${PROJECT_SOURCE_DIR}" _makefile_inc_text ${_makefile_inc_text})
string(REPLACE "$(top_builddir)" "\${PROJECT_BINARY_DIR}" _makefile_inc_text ${_makefile_inc_text}) string(REPLACE "$(top_builddir)" "\${PROJECT_BINARY_DIR}" _makefile_inc_text ${_makefile_inc_text})
string(REGEX REPLACE "\\\\\n" "!π!α!" _makefile_inc_text ${_makefile_inc_text}) string(REGEX REPLACE "\\\\\n" "!^!^!" _makefile_inc_text ${_makefile_inc_text})
string(REGEX REPLACE "([a-zA-Z_][a-zA-Z0-9_]*)[\t ]*=[\t ]*([^\n]*)" "set(\\1 \\2)" _makefile_inc_text ${_makefile_inc_text}) string(REGEX REPLACE "([a-zA-Z_][a-zA-Z0-9_]*)[\t ]*=[\t ]*([^\n]*)" "set(\\1 \\2)" _makefile_inc_text ${_makefile_inc_text})
string(REPLACE "!π!α!" "\n" _makefile_inc_text ${_makefile_inc_text}) string(REPLACE "!^!^!" "\n" _makefile_inc_text ${_makefile_inc_text})
# Replace $() with ${} # Replace $() with ${}
string(REGEX REPLACE "\\$\\(([a-zA-Z_][a-zA-Z0-9_]*)\\)" "\${\\1}" _makefile_inc_text ${_makefile_inc_text}) string(REGEX REPLACE "\\$\\(([a-zA-Z_][a-zA-Z0-9_]*)\\)" "\${\\1}" _makefile_inc_text ${_makefile_inc_text})

View file

@ -810,7 +810,7 @@ struct Curl_addrinfo *Curl_async_getaddrinfo(struct Curl_easy *data,
} }
/* Set what DNS server are is to use. This is called in 2 situations: /* Set what DNS server are is to use. This is called in 2 situations:
* 1. when the application does `CURLOPT_DNS_SERVERS´ and passing NULL * 1. when the application does 'CURLOPT_DNS_SERVERS' and passing NULL
* means any previous set value should be unset. Which means * means any previous set value should be unset. Which means
* we need to destroy and create the are channel anew, if there is one. * we need to destroy and create the are channel anew, if there is one.
* 2. When we lazy init the ares channel and NULL means that there * 2. When we lazy init the ares channel and NULL means that there

View file

@ -302,11 +302,11 @@ curl_off_t our_getpid(void)
pid = (curl_off_t)curlx_getpid(); pid = (curl_off_t)curlx_getpid();
#ifdef _WIN32 #ifdef _WIN32
/* store pid + MAX_PID to avoid conflict with Cygwin/msys PIDs, see also: /* store pid + MAX_PID to avoid conflict with Cygwin/msys PIDs, see also:
* - 2019-01-31: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; * - 2019-01-31: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit;
* h=b5e1003722cb14235c4f166be72c09acdffc62ea * h=b5e1003722cb14235c4f166be72c09acdffc62ea
* - 2019-02-02: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; * - 2019-02-02: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit;
* h=448cf5aa4b429d5a9cebf92a0da4ab4b5b6d23fe * h=448cf5aa4b429d5a9cebf92a0da4ab4b5b6d23fe
* - 2024-12-19: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit; * - 2024-12-19: https://cygwin.com/git/?p=newlib-cygwin.git;a=commit;
* h=363357c023ce01e936bdaedf0f479292a8fa4e0f * h=363357c023ce01e936bdaedf0f479292a8fa4e0f
*/ */
pid += 4194304; pid += 4194304;