tool_getparam: warn on more unicode prefixes

If a string argument is expected and the first two bytes are 0xe2 ex80
and the third has the 7th bit set, that's enough for curl to warn.

Previously we tried to detect and warn only for the unicode double
quote, but users might use single quotes, other quotes or even lead the
argument with one of the "zero widths" characters. This is an attempt to
detect many of those. Without triggering for "normal" IDN hostnames.

Closes #18459
This commit is contained in:
Daniel Stenberg 2025-09-03 09:52:36 +02:00
parent 4c70b71ba4
commit 765b5ab0cd
No known key found for this signature in database
GPG key ID: 5CC908FDB71E12C2
3 changed files with 13 additions and 8 deletions

View file

@ -2808,6 +2808,12 @@ static ParameterError opt_filestring(struct OperationConfig *config,
return err;
}
/* detect e2 80 80 - e2 80 ff */
static bool has_leading_unicode(const unsigned char *arg)
{
return ((arg[0] == 0xe2) && (arg[1] == 0x80) && (arg[2] & 0x80));
}
/* the longest command line option, excluding the leading -- */
#define MAX_OPTION_LEN 26
@ -2947,10 +2953,9 @@ ParameterError getparameter(const char *flag, /* f or -long-flag */
warnf("The filename argument '%s' looks like a flag.",
nextarg);
}
else if(!strncmp("\xe2\x80\x9c", nextarg, 3)) {
warnf("The argument '%s' starts with a Unicode quote where "
"maybe an ASCII \" was intended?",
nextarg);
else if(has_leading_unicode((const unsigned char *)nextarg)) {
warnf("The argument '%s' starts with a Unicode character. "
"Maybe ASCII was intended?", nextarg);
}
/* ARG_FILE | ARG_STRG */
err = opt_filestring(config, a, nextarg);

View file

@ -45,8 +45,8 @@ warn about Unicode quote character
# Verify data after the test has been "shot"
<verify>
<stderr>
%hex[Warning: The argument '%e2%80%9chost:' starts with a Unicode quote where maybe an ]hex%
Warning: ASCII " was intended?
%hex[Warning: The argument '%e2%80%9chost:' starts with a Unicode character. Maybe ASCII ]hex%
Warning: was intended?
</stderr>
</verify>
</testcase>

View file

@ -45,8 +45,8 @@ warn about Unicode quote character read from config file
# Verify data after the test has been "shot"
<verify>
<stderr mode="text">
%hex[Warning: The argument '%e2%80%9chost:fake%e2%80%9d' starts with a Unicode quote where ]hex%
Warning: maybe an ASCII " was intended?
%hex[Warning: The argument '%e2%80%9chost:fake%e2%80%9d' starts with a Unicode character. Maybe ]hex%
Warning: ASCII was intended?
</stderr>
</verify>
</testcase>