mirror of
https://github.com/curl/curl.git
synced 2026-04-14 22:11:45 +03:00
badwords: combine the whitelisting into a single regex
Also: make the whitelist matches case insensitve Takes the script execution time down from 3.6 seconds to 1.1 on my machine. Closes #20880
This commit is contained in:
parent
8c908d2d0a
commit
2e52a57107
1 changed files with 15 additions and 11 deletions
|
|
@ -20,8 +20,11 @@ use warnings;
|
|||
my @whitelist = (
|
||||
# ignore what looks like URLs
|
||||
'(^|\W)((https|http|ftp):\/\/[a-z0-9\-._~%:\/?\#\[\]\@!\$&\'\(\)*+,;=]+)',
|
||||
# ignore bolded sections
|
||||
'\*\*(.*?)\*\*');
|
||||
# remove bolded sections
|
||||
'\*\*.*?\*\*',
|
||||
# remove backticked texts
|
||||
'\`.*?\`'
|
||||
);
|
||||
my %alt;
|
||||
my %exactcase;
|
||||
my $skip_indented = 1;
|
||||
|
|
@ -55,7 +58,7 @@ while(<STDIN>) {
|
|||
if($_ =~ /^#/) {
|
||||
next;
|
||||
}
|
||||
if($_ =~ /^---(.*)/) {
|
||||
if($_ =~ /^---(.+)/) {
|
||||
push @whitelist, $1;
|
||||
}
|
||||
elsif($_ =~ /^(.*)([:=])(.*)/) {
|
||||
|
|
@ -85,6 +88,11 @@ if(@exact) {
|
|||
$re_cs = qr/\b($pat)\b/;
|
||||
}
|
||||
|
||||
# Build a single combined regex for removing whitelisted content
|
||||
my $re_wl;
|
||||
my $pat = join('|', map { $_ } @whitelist);
|
||||
$re_wl = qr/($pat)/;
|
||||
|
||||
my $errors = 0;
|
||||
|
||||
sub highlight {
|
||||
|
|
@ -123,24 +131,20 @@ sub file {
|
|||
}
|
||||
# remove the link part
|
||||
$in =~ s/(\[.*\])\(.*\)/$1/g;
|
||||
# remove backticked texts
|
||||
$in =~ s/\`.*\`//g;
|
||||
# remove whitelisted patterns (pre-compiled)
|
||||
for my $p (@whitelist) {
|
||||
$in =~ s/$p//g;
|
||||
if($re_wl) {
|
||||
$in =~ s/${re_wl}//ig;
|
||||
}
|
||||
# case-insensitive bad words
|
||||
if($re_ci) {
|
||||
while($in =~ /^(.*)$re_ci/i) {
|
||||
if($in =~ /^(.*)$re_ci/i) {
|
||||
highlight($1, $2, $in, $f, $l, lc($2));
|
||||
last;
|
||||
}
|
||||
}
|
||||
# case-sensitive (exact) bad words
|
||||
if($re_cs) {
|
||||
while($in =~ /^(.*)$re_cs/) {
|
||||
if($in =~ /^(.*)$re_cs/) {
|
||||
highlight($1, $2, $in, $f, $l, $2);
|
||||
last;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue