urlapi: simplify and split into sub functions

Closes #17565
This commit is contained in:
Daniel Stenberg 2025-06-09 17:22:28 +02:00
parent 3c26e6a896
commit f9d8ed63ed
No known key found for this signature in database
GPG key ID: 5CC908FDB71E12C2

View file

@ -1320,18 +1320,221 @@ fail:
return NULL;
}
#ifndef USE_IDN
#define host_decode(x,y) CURLUE_LACKS_IDN
#define host_encode(x,y) CURLUE_LACKS_IDN
#else
static CURLUcode host_decode(const char *host, char **allochost)
{
CURLcode result = Curl_idn_decode(host, allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
return CURLUE_OK;
}
static CURLUcode host_encode(const char *host, char **allochost)
{
CURLcode result = Curl_idn_encode(host, allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
return CURLUE_OK;
}
#endif
static CURLUcode urlget_format(const CURLU *u, CURLUPart what,
const char *ptr, char **part,
bool plusdecode, unsigned int flags)
{
size_t partlen = strlen(ptr);
bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
bool punycode = (flags & CURLU_PUNYCODE) && (what == CURLUPART_HOST);
bool depunyfy = (flags & CURLU_PUNY2IDN) && (what == CURLUPART_HOST);
*part = Curl_memdup0(ptr, partlen);
if(!*part)
return CURLUE_OUT_OF_MEMORY;
if(plusdecode) {
/* convert + to space */
char *plus = *part;
size_t i = 0;
for(i = 0; i < partlen; ++plus, i++) {
if(*plus == '+')
*plus = ' ';
}
}
if(urldecode) {
char *decoded;
size_t dlen;
/* this unconditional rejection of control bytes is documented
API behavior */
CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
free(*part);
if(res) {
*part = NULL;
return CURLUE_URLDECODE;
}
*part = decoded;
partlen = dlen;
}
if(urlencode) {
struct dynbuf enc;
CURLUcode uc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
if(uc)
return uc;
free(*part);
*part = curlx_dyn_ptr(&enc);
}
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
char *allochost = NULL;
CURLUcode ret = host_decode(*part, &allochost);
if(ret)
return ret;
free(*part);
*part = allochost;
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host)) {
char *allochost = NULL;
CURLUcode ret = host_encode(*part, &allochost);
if(ret)
return ret;
free(*part);
*part = allochost;
}
}
return CURLUE_OK;
}
static CURLUcode urlget_url(const CURLU *u, char **part, unsigned int flags)
{
char *url;
const char *scheme;
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
bool show_fragment =
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
bool show_query = (u->query && u->query[0]) ||
(u->query_present && flags & CURLU_GET_EMPTY);
bool punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
bool depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
char portbuf[7];
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s%s%s",
u->path,
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
else {
const struct Curl_handler *h = NULL;
char schemebuf[MAX_SCHEME_LEN + 5];
if(u->scheme)
scheme = u->scheme;
else if(flags & CURLU_DEFAULT_SCHEME)
scheme = DEFAULT_SCHEME;
else
return CURLUE_NO_SCHEME;
h = Curl_get_scheme_handler(scheme);
if(!port && (flags & CURLU_DEFAULT_PORT)) {
/* there is no stored port number, but asked to deliver
a default one for the scheme */
if(h) {
msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
port = portbuf;
}
}
else if(port) {
/* there is a stored port number, but asked to inhibit if it matches
the default one for the scheme */
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
port = NULL;
}
if(h && !(h->flags & PROTOPT_URLOPTIONS))
options = NULL;
if(u->host[0] == '[') {
if(u->zoneid) {
/* make it '[ host %25 zoneid ]' */
struct dynbuf enc;
size_t hostlen = strlen(u->host);
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
u->zoneid))
return CURLUE_OUT_OF_MEMORY;
allochost = curlx_dyn_ptr(&enc);
}
}
else if(urlencode) {
allochost = curl_easy_escape(NULL, u->host, 0);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
}
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
CURLUcode ret = host_decode(u->host, &allochost);
if(ret)
return ret;
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host)) {
CURLUcode ret = host_encode(u->host, &allochost);
if(ret)
return ret;
}
}
if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
else
schemebuf[0] = 0;
url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
schemebuf,
u->user ? u->user : "",
u->password ? ":": "",
u->password ? u->password : "",
options ? ";" : "",
options ? options : "",
(u->user || u->password || options) ? "@": "",
allochost ? allochost : u->host,
port ? ":": "",
port ? port : "",
u->path ? u->path : "/",
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
free(allochost);
}
if(!url)
return CURLUE_OUT_OF_MEMORY;
*part = url;
return CURLUE_OK;
}
CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
char **part, unsigned int flags)
{
const char *ptr;
CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
char portbuf[7];
bool urldecode = (flags & CURLU_URLDECODE) ? 1 : 0;
bool urlencode = (flags & CURLU_URLENCODE) ? 1 : 0;
bool punycode = FALSE;
bool depunyfy = FALSE;
bool plusdecode = FALSE;
(void)flags;
if(!u)
return CURLUE_BAD_HANDLE;
if(!part)
@ -1342,7 +1545,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
case CURLUPART_SCHEME:
ptr = u->scheme;
ifmissing = CURLUE_NO_SCHEME;
urldecode = FALSE; /* never for schemes */
flags &= ~CURLU_URLDECODE; /* never for schemes */
if((flags & CURLU_NO_GUESS_SCHEME) && u->guessed_scheme)
return CURLUE_NO_SCHEME;
break;
@ -1361,8 +1564,6 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
case CURLUPART_HOST:
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
@ -1371,7 +1572,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
case CURLUPART_PORT:
ptr = u->port;
ifmissing = CURLUE_NO_PORT;
urldecode = FALSE; /* never for port */
flags &= ~CURLU_URLDECODE; /* never for port */
if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
/* there is no stored port number, but asked to deliver
a default one for the scheme */
@ -1398,7 +1599,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
case CURLUPART_QUERY:
ptr = u->query;
ifmissing = CURLUE_NO_QUERY;
plusdecode = urldecode;
plusdecode = flags & CURLU_URLDECODE;
if(ptr && !ptr[0] && !(flags & CURLU_GET_EMPTY))
/* there was a blank query and the user do not ask for it */
ptr = NULL;
@ -1410,219 +1611,31 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
/* there was a blank fragment and the user asks for it */
ptr = "";
break;
case CURLUPART_URL: {
char *url;
const char *scheme;
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
bool show_fragment =
u->fragment || (u->fragment_present && flags & CURLU_GET_EMPTY);
bool show_query =
(u->query && u->query[0]) ||
(u->query_present && flags & CURLU_GET_EMPTY);
punycode = (flags & CURLU_PUNYCODE) ? 1 : 0;
depunyfy = (flags & CURLU_PUNY2IDN) ? 1 : 0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s%s%s",
u->path,
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
}
else if(!u->host)
return CURLUE_NO_HOST;
else {
const struct Curl_handler *h = NULL;
char schemebuf[MAX_SCHEME_LEN + 5];
if(u->scheme)
scheme = u->scheme;
else if(flags & CURLU_DEFAULT_SCHEME)
scheme = DEFAULT_SCHEME;
else
return CURLUE_NO_SCHEME;
h = Curl_get_scheme_handler(scheme);
if(!port && (flags & CURLU_DEFAULT_PORT)) {
/* there is no stored port number, but asked to deliver
a default one for the scheme */
if(h) {
msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
port = portbuf;
}
}
else if(port) {
/* there is a stored port number, but asked to inhibit if it matches
the default one for the scheme */
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
port = NULL;
}
if(h && !(h->flags & PROTOPT_URLOPTIONS))
options = NULL;
if(u->host[0] == '[') {
if(u->zoneid) {
/* make it '[ host %25 zoneid ]' */
struct dynbuf enc;
size_t hostlen = strlen(u->host);
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
if(curlx_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
u->zoneid))
return CURLUE_OUT_OF_MEMORY;
allochost = curlx_dyn_ptr(&enc);
}
}
else if(urlencode) {
allochost = curl_easy_escape(NULL, u->host, 0);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
}
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
CURLcode result = Curl_idn_decode(u->host, &allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
#endif
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
CURLcode result = Curl_idn_encode(u->host, &allochost);
if(result)
/* this is the most likely error */
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
#endif
}
}
if(!(flags & CURLU_NO_GUESS_SCHEME) || !u->guessed_scheme)
msnprintf(schemebuf, sizeof(schemebuf), "%s://", scheme);
else
schemebuf[0] = 0;
url = aprintf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
schemebuf,
u->user ? u->user : "",
u->password ? ":": "",
u->password ? u->password : "",
options ? ";" : "",
options ? options : "",
(u->user || u->password || options) ? "@": "",
allochost ? allochost : u->host,
port ? ":": "",
port ? port : "",
u->path ? u->path : "/",
show_query ? "?": "",
u->query ? u->query : "",
show_fragment ? "#": "",
u->fragment ? u->fragment : "");
free(allochost);
}
if(!url)
return CURLUE_OUT_OF_MEMORY;
*part = url;
return CURLUE_OK;
}
case CURLUPART_URL:
return urlget_url(u, part, flags);
default:
ptr = NULL;
break;
}
if(ptr) {
size_t partlen = strlen(ptr);
size_t i = 0;
*part = Curl_memdup0(ptr, partlen);
if(!*part)
return CURLUE_OUT_OF_MEMORY;
if(plusdecode) {
/* convert + to space */
char *plus = *part;
for(i = 0; i < partlen; ++plus, i++) {
if(*plus == '+')
*plus = ' ';
}
}
if(urldecode) {
char *decoded;
size_t dlen;
/* this unconditional rejection of control bytes is documented
API behavior */
CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
free(*part);
if(res) {
*part = NULL;
return CURLUE_URLDECODE;
}
*part = decoded;
partlen = dlen;
}
if(urlencode) {
struct dynbuf enc;
CURLUcode uc;
curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
if(uc)
return uc;
free(*part);
*part = curlx_dyn_ptr(&enc);
}
else if(punycode) {
if(!Curl_is_ASCII_name(u->host)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
char *allochost;
CURLcode result = Curl_idn_decode(*part, &allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
free(*part);
*part = allochost;
#endif
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
char *allochost;
CURLcode result = Curl_idn_encode(*part, &allochost);
if(result)
return (result == CURLE_OUT_OF_MEMORY) ?
CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
free(*part);
*part = allochost;
#endif
}
}
if(ptr)
return urlget_format(u, what, ptr, part, plusdecode, flags);
return CURLUE_OK;
}
else
return ifmissing;
return ifmissing;
}
static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
unsigned int flags)
unsigned int flags)
{
size_t plen = strlen(scheme);
size_t plen = strlen(scheme);
const struct Curl_handler *h = NULL;
if((plen > MAX_SCHEME_LEN) || (plen < 1))
/* too long or too short */
return CURLUE_BAD_SCHEME;
/* verify that it is a fine scheme */
h = Curl_get_scheme_handler(scheme);
if(!h) {
const char *s = scheme;
if((plen > MAX_SCHEME_LEN) || (plen < 1))
/* too long or too short */
return CURLUE_BAD_SCHEME;
/* verify that it is a fine scheme */
if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(scheme))
if(!(flags & CURLU_NON_SUPPORT_SCHEME))
return CURLUE_UNSUPPORTED_SCHEME;
if(ISALPHA(*s)) {
/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
@ -1635,8 +1648,9 @@ static CURLUcode set_url_scheme(CURLU *u, const char *scheme,
}
else
return CURLUE_BAD_SCHEME;
u->guessed_scheme = FALSE;
return CURLUE_OK;
}
u->guessed_scheme = FALSE;
return CURLUE_OK;
}
static CURLUcode set_url_port(CURLU *u, const char *provided_port)
@ -1659,7 +1673,7 @@ static CURLUcode set_url_port(CURLU *u, const char *provided_port)
}
static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
unsigned int flags)
unsigned int flags)
{
/*
* Allow a new URL to replace the existing (if any) contents.
@ -1696,6 +1710,53 @@ static CURLUcode set_url(CURLU *u, const char *url, size_t part_size,
return uc;
}
static CURLUcode urlset_clear(CURLU *u, CURLUPart what)
{
switch(what) {
case CURLUPART_URL:
free_urlhandle(u);
memset(u, 0, sizeof(struct Curl_URL));
break;
case CURLUPART_SCHEME:
Curl_safefree(u->scheme);
u->guessed_scheme = FALSE;
break;
case CURLUPART_USER:
Curl_safefree(u->user);
break;
case CURLUPART_PASSWORD:
Curl_safefree(u->password);
break;
case CURLUPART_OPTIONS:
Curl_safefree(u->options);
break;
case CURLUPART_HOST:
Curl_safefree(u->host);
break;
case CURLUPART_ZONEID:
Curl_safefree(u->zoneid);
break;
case CURLUPART_PORT:
u->portnum = 0;
Curl_safefree(u->port);
break;
case CURLUPART_PATH:
Curl_safefree(u->path);
break;
case CURLUPART_QUERY:
Curl_safefree(u->query);
u->query_present = FALSE;
break;
case CURLUPART_FRAGMENT:
Curl_safefree(u->fragment);
u->fragment_present = FALSE;
break;
default:
return CURLUE_UNKNOWN_PART;
}
return CURLUE_OK;
}
CURLUcode curl_url_set(CURLU *u, CURLUPart what,
const char *part, unsigned int flags)
{
@ -1710,57 +1771,9 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
if(!u)
return CURLUE_BAD_HANDLE;
if(!part) {
if(!part)
/* setting a part to NULL clears it */
switch(what) {
case CURLUPART_URL:
break;
case CURLUPART_SCHEME:
storep = &u->scheme;
u->guessed_scheme = FALSE;
break;
case CURLUPART_USER:
storep = &u->user;
break;
case CURLUPART_PASSWORD:
storep = &u->password;
break;
case CURLUPART_OPTIONS:
storep = &u->options;
break;
case CURLUPART_HOST:
storep = &u->host;
break;
case CURLUPART_ZONEID:
storep = &u->zoneid;
break;
case CURLUPART_PORT:
u->portnum = 0;
storep = &u->port;
break;
case CURLUPART_PATH:
storep = &u->path;
break;
case CURLUPART_QUERY:
storep = &u->query;
u->query_present = FALSE;
break;
case CURLUPART_FRAGMENT:
storep = &u->fragment;
u->fragment_present = FALSE;
break;
default:
return CURLUE_UNKNOWN_PART;
}
if(storep && *storep) {
Curl_safefree(*storep);
}
else if(!storep) {
free_urlhandle(u);
memset(u, 0, sizeof(struct Curl_URL));
}
return CURLUE_OK;
}
return urlset_clear(u, what);
nalloc = strlen(part);
if(nalloc > CURL_MAX_INPUT_LENGTH)
@ -1810,9 +1823,8 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
storep = &u->fragment;
u->fragment_present = TRUE;
break;
case CURLUPART_URL: {
case CURLUPART_URL:
return set_url(u, part, nalloc, flags);
}
default:
return CURLUE_UNKNOWN_PART;
}