badwords: rework exceptions, fix many of them

Also:
- support per-directory and per-upper-directory whitelist entries.
- convert badlist input grep tweak into the above format.
  (except for 'And' which had just a few hits.)
- fix many code exceptions, but do not enforce.
  (there also remain about 350 'will' uses in lib)
- fix badwords in example code, drop exceptions.
- badwords-all: convert to Perl.
  To make it usable from CMake.
- FAQ: reword to not use 'will'. Drop exception.

Closes #20886
This commit is contained in:
Viktor Szakats 2026-03-11 10:17:10 +01:00
parent 11c14b5ca5
commit 435eabeac8
No known key found for this signature in database
89 changed files with 367 additions and 344 deletions

View file

@ -17,6 +17,8 @@
use strict;
use warnings;
use File::Basename;
my @whitelist = (
# ignore what looks like URLs
'(^|\W)((https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&\'\(\)*+,;=]+)',
@ -99,16 +101,32 @@ sub highlight {
my ($p, $w, $in, $f, $l, $lookup) = @_;
my $c = length($p)+1;
my $ch = "$f:$l:$w";
my $ch;
my $dir = dirname($f);
$ch = $dir . "/" . "::" . $w;
if($wl{$ch}) {
# whitelisted filename + line + word
# whitelisted dirname + word
return;
}
my $updir = dirname($dir);
if($dir ne $updir) {
$ch = $updir . "/" . "::" . $w;
if($wl{$ch}) {
# whitelisted upper dirname + word
return;
}
}
$ch = $f . "::" . $w;
if($wl{$ch}) {
# whitelisted filename + word
return;
}
$ch = "$f:$l:$w";
if($wl{$ch}) {
# whitelisted filename + line + word
return;
}
print STDERR "$f:$l:$c: error: found bad word \"$w\"\n";
printf STDERR " %4d | %s\n", $l, $in;

View file

@ -1,12 +1,14 @@
#!/bin/sh
#!/usr/bin/env perl
# Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
#
# SPDX-License-Identifier: curl
set -eu
use strict;
use warnings;
cd "$(dirname "${0}")"/..
use File::Basename;
# we allow some extra in source code
grep -Ev '^(will:|But=|So=|And=| url=)' scripts/badwords.txt | scripts/badwords -a src lib include docs/examples
scripts/badwords -w scripts/badwords.ok '**.md' projects/OS400/README.OS400 < scripts/badwords.txt
chdir dirname(__FILE__) . "/..";
system("scripts/badwords -a -w scripts/badwords.ok src lib include docs/examples < scripts/badwords.txt");
system("scripts/badwords -w scripts/badwords.ok '**.md' projects/OS400/README.OS400 < scripts/badwords.txt");

View file

@ -4,5 +4,12 @@
#
# whitelisted uses of bad words
# file:[line]:rule
docs/FAQ.md::will
docs/FAQ.md::Will
lib/urldata.h:: url
include/curl/::will
lib/::But
lib/::So
lib/::will
lib/::Will
lib/::WILL
src/::will
src/::Will