badwords: re-sync with curl-www, fix issues found

Also:
- replace `manpage` with `man page`, add to `badwords.txt`.
- badwords.pl: import `-w` feature from curl-www, syncing the two
  scripts fully.
- badwords.txt: import missing items from curl-www, syncing the two
  files fully.
- pyspelling.words: drop `cURL` allowed word.

Closes #19468
This commit is contained in:
Viktor Szakats 2025-11-11 17:43:06 +01:00
parent ebc5fea64d
commit 4841e4290d
No known key found for this signature in database
GPG key ID: B5ABD165E2AEF201
27 changed files with 109 additions and 78 deletions

View file

@ -21,6 +21,24 @@ my @whitelist;
my %alt;
my %exactcase;
my %wl;
if($ARGV[0] eq "-w") {
shift @ARGV;
my $file = shift @ARGV;
open(W, "<$file");
while(<W>) {
if(/^#/) {
# allow #-comments
next;
}
if(/^([^:]*):(\d+):(.*)/) {
$wl{"$1:$2:$3"}=1;
#print STDERR "whitelisted $1:$2:$3\n";
}
}
close(W);
}
my @w;
while(<STDIN>) {
chomp;
@ -30,7 +48,7 @@ while(<STDIN>) {
if($_ =~ /^---(.*)/) {
push @whitelist, $1;
}
elsif($_ =~ /^([^:=]*)([:=])(.*)/) {
elsif($_ =~ /^(.*)([:=])(.*)/) {
my ($bad, $sep, $better)=($1, $2, $3);
push @w, $bad;
$alt{$bad} = $better;
@ -67,6 +85,14 @@ sub file {
($in =~ /^(.*)$w/ && $case) ) {
my $p = $1;
my $c = length($p)+1;
my $ch = "$f:$l:$w";
if($wl{$ch}) {
# whitelisted
print STDERR "$ch found but whitelisted\n";
next;
}
print STDERR "$f:$l:$c: error: found bad word \"$w\"\n";
printf STDERR " %4d | $in\n", $l;
printf STDERR " | %*s^%s\n", length($p), " ",

View file

@ -3,7 +3,7 @@
# SPDX-License-Identifier: curl
#
back-end:backend
e-mail:email
\be-mail[^/]:email
run-time:runtime
set-up:setup
tool chain:toolchain
@ -36,7 +36,7 @@ aren't:are not
a IPv4: an IPv4
a IPv6: an IPv6
url =URL
internet\b=Internet
[^/]internet\b=Internet
isation:ization
\bit's:it is
it'd:it would
@ -47,12 +47,14 @@ there's:there is
\. So : Rewrite without "so" ?
dir :directory
sub-director:subdirector
you'd:you would
you'll:you will
can't:cannot
that's:that is
web page:webpage
host name\b:hostname
host names\b:hostnames
file name\b:filename
[^;]file name\b:filename
file names\b:filenames
\buser name\b:username
\buser names\b:usernames
@ -70,7 +72,11 @@ couldn't:could not
32-bits:32 bits or 32-bit
\bvery\b:rephrase using an alternative word
\bCurl\b=curl
\bcURL\b=curl
\bLibcurl\b=libcurl
\bLibCurl\b=libcurl
---WWW::Curl
---NET::Curl
---Curl Corporation
\bmanpages[^./&:-]:man pages
\bmanpage[^si./&:-]:man page

View file

@ -149,7 +149,6 @@ CSeq
csh
cshrc
CTRL
cURL
CURLcode
curldown
CURLE