test1675: unit tests for URL API helper functions

- ipv4_normalize
- urlencode_str
- ipv6_parse
- parse_file

urlapi: make the string URL encoder normalize to uppercase
percent-encoding

Closes #21296
This commit is contained in:
Daniel Stenberg 2026-04-13 12:46:45 +02:00
parent d7a991cc9b
commit 0b4ebebb06
No known key found for this signature in database
GPG key ID: 5CC908FDB71E12C2
8 changed files with 414 additions and 46 deletions

View file

@ -24,6 +24,30 @@
*
***************************************************************************/
#include "curl_setup.h"
#include <curl/urlapi.h>
/* Internal representation of CURLU. Point to URL-encoded strings. */
struct Curl_URL {
char *scheme;
char *user;
char *password;
char *options; /* IMAP only? */
char *host;
char *zoneid; /* for numerical IPv6 addresses */
char *port;
char *path;
char *query;
char *fragment;
unsigned short portnum; /* the numerical version (if 'port' is set) */
BIT(query_present); /* to support blank */
BIT(fragment_present); /* to support blank */
BIT(guessed_scheme); /* when a URL without scheme is parsed */
};
#define HOST_ERROR (-1) /* out of memory */
#define HOST_NAME 1
#define HOST_IPV4 2
#define HOST_IPV6 3
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
bool guess_scheme);

View file

@ -63,24 +63,6 @@
#define AF_INET6 (AF_INET + 1)
#endif
/* Internal representation of CURLU. Point to URL-encoded strings. */
struct Curl_URL {
char *scheme;
char *user;
char *password;
char *options; /* IMAP only? */
char *host;
char *zoneid; /* for numerical IPv6 addresses */
char *port;
char *path;
char *query;
char *fragment;
unsigned short portnum; /* the numerical version (if 'port' is set) */
BIT(query_present); /* to support blank */
BIT(fragment_present); /* to support blank */
BIT(guessed_scheme); /* when a URL without scheme is parsed */
};
#define DEFAULT_SCHEME "https"
static void free_urlhandle(struct Curl_URL *u)
@ -124,15 +106,25 @@ static const char *find_host_sep(const char *url)
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
* spaces in the source URL accordingly.
*
* This function re-encodes the string, meaning that it leaves already encoded
* bytes as-is and works by encoding only what *has* to be encoded - unless it
* has to uppercase the hex to normalize.
*
* Illegal percent-encoding sequences are left as-is.
*
* URL encoding should be skipped for hostnames, otherwise IDN resolution
* will fail.
*
* @unittest: 1675
*/
static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
bool query);
UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t len, bool relative,
bool query)
{
/* we must add this with whitespace-replacing */
bool left = !query;
const unsigned char *iptr;
const unsigned char *host_sep = (const unsigned char *)url;
CURLcode result = CURLE_OK;
@ -149,7 +141,7 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
for(iptr = host_sep; len && !result; iptr++, len--) {
if(*iptr == ' ') {
if(left)
if(!query)
result = curlx_dyn_addn(o, "%20", 3);
else
result = curlx_dyn_addn(o, "+", 1);
@ -159,10 +151,21 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
Curl_hexbyte(&out[1], *iptr);
result = curlx_dyn_addn(o, out, 3);
}
else if(*iptr == '%' && ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) &&
(ISLOWER(iptr[1]) || ISLOWER(iptr[2]))) {
/* uppercase it */
unsigned char hex = (unsigned char)((curlx_hexval(iptr[1]) << 4) |
curlx_hexval(iptr[2]));
unsigned char out[3] = { '%' };
Curl_hexbyte(&out[1], hex);
result = curlx_dyn_addn(o, out, 3);
iptr += 2;
len -= 2;
}
else {
result = curlx_dyn_addn(o, iptr, 1);
if(*iptr == '?')
left = FALSE;
query = TRUE;
}
}
@ -388,8 +391,14 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
return CURLUE_OK;
}
/* this assumes 'hostname' now starts with [ */
static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
/* This function assumes 'hostname' now starts with [. It trims 'hostname' in
* place and it sets u->zoneid if present.
*
* @unittest: 1675
*/
UNITTEST CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
size_t hlen);
UNITTEST CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
size_t hlen) /* length of hostname */
{
size_t len;
@ -443,7 +452,9 @@ static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
return CURLUE_OK;
}
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
UNITTEST CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
size_t hlen);
UNITTEST CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
size_t hlen) /* length of hostname */
{
size_t len;
@ -474,15 +485,12 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
* integers.
*
* Returns the host type.
*
* @unittest: 1675
*/
#define HOST_ERROR (-1) /* out of memory */
#define HOST_NAME 1
#define HOST_IPV4 2
#define HOST_IPV6 3
static int ipv4_normalize(struct dynbuf *host)
UNITTEST int ipv4_normalize(struct dynbuf *host);
UNITTEST int ipv4_normalize(struct dynbuf *host)
{
bool done = FALSE;
int n = 0;
@ -820,7 +828,13 @@ end:
return result ? 1 : 0; /* success */
}
static CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
/*
* @unittest: 1675
*/
UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
struct dynbuf *host, const char **pathp,
size_t *pathlenp);
UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u,
struct dynbuf *host, const char **pathp,
size_t *pathlenp)
{

View file

@ -223,7 +223,7 @@ test1650 test1651 test1652 test1653 test1654 test1655 test1656 test1657 \
test1658 test1659 test1660 test1661 test1662 test1663 test1664 test1665 \
test1666 test1667 test1668 test1669 \
\
test1670 test1671 test1672 test1673 test1674 \
test1670 test1671 test1672 test1673 test1674 test1675 \
\
test1680 test1681 test1682 test1683 test1684 test1685 \
\

18
tests/data/test1675 Normal file
View file

@ -0,0 +1,18 @@
<?xml version="1.0" encoding="US-ASCII"?>
<testcase>
<info>
<keywords>
unittest
urlapi
</keywords>
</info>
<client>
<features>
unittest
</features>
<name>
URL API unit tests
</name>
</client>
</testcase>

View file

@ -37,7 +37,7 @@ a few bytes
# Verify data after the test has been "shot"
<verify>
<protocol crlf="headers">
PUT /we/want/%TESTNUMBERte%5b%5dst.txt HTTP/1.1
PUT /we/want/%TESTNUMBERte%5B%5Dst.txt HTTP/1.1
Host: %HOSTIP:%HTTPPORT
User-Agent: curl/%VERSION
Accept: */*

View file

@ -438,11 +438,9 @@ static const struct testcase get_parts_list[] = {
{"file:///C:\\programs\\foo",
"file | [11] | [12] | [13] | [14] | [15] | C:\\programs\\foo | [16] | [17]",
CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
{"file://host.example.com/Share/path/to/file.txt",
"file | [11] | [12] | [13] | host.example.com | [15] | "
"//host.example.com/Share/path/to/file.txt | [16] | [17]",
CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
#endif
{"file://host.example.com/Share/path/to/file.txt", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_BAD_FILE_URL},
{"https://example.com/color/#green?no-red",
"https | [11] | [12] | [13] | example.com | [15] | /color/ | [16] | "
"green?no-red",

View file

@ -42,7 +42,7 @@ TESTS_C = \
unit1650.c unit1651.c unit1652.c unit1653.c unit1654.c unit1655.c unit1656.c \
unit1657.c unit1658.c unit1660.c unit1661.c unit1663.c unit1664.c \
unit1666.c unit1667.c unit1668.c unit1669.c \
unit1674.c \
unit1674.c unit1675.c \
unit1979.c unit1980.c \
unit2600.c unit2601.c unit2602.c unit2603.c unit2604.c unit2605.c \
unit3200.c unit3205.c \

314
tests/unit/unit1675.c Normal file
View file

@ -0,0 +1,314 @@
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
* SPDX-License-Identifier: curl
*
***************************************************************************/
#include "unitcheck.h"
#include "urlapi-int.h"
#include "curlx/dynbuf.h"
static CURLcode test_unit1675(const char *arg)
{
(void)arg;
UNITTEST_BEGIN_SIMPLE
/* Test ipv4_normalize */
{
struct dynbuf host;
int fails = 0;
unsigned int i;
struct ipv4_test {
const char *in;
const char *out;
};
const struct ipv4_test tests[] = {
{"0x.0x.0x.0x", NULL}, /* invalid hex */
{"0x.0x.0x", NULL}, /* invalid hex */
{"0x.0x", NULL}, /* invalid hex */
{"0x", NULL}, /* invalid hex */
{"0", "0.0.0.0"},
{"00", "0.0.0.0"},
{"00000000000", "0.0.0.0"},
{"127.0.0.1", "127.0.0.1"},
{"0177.0.0.1", "127.0.0.1"},
{"00177.0.0.1", "127.0.0.1"},
{"0x7f.0.0.1", "127.0.0.1"},
{"0x07f.0.0.1", "127.0.0.1"},
{"1", "0.0.0.1"},
{"010", "0.0.0.8"},
{"001", "0.0.0.1"},
{"127", "0.0.0.127"},
{"127.1", "127.0.0.1"},
{"127.0.1", "127.0.0.1"},
{"1.16777215", "1.255.255.255"},
{"1.16777216", NULL}, /* overflow */
{"1.1.65535", "1.1.255.255"},
{"1.1.65536", NULL}, /* overflow */
{"0x7f000001", "127.0.0.1"},
{"0x7F000001", "127.0.0.1"},
{"0x7g000001", NULL}, /* bad hex */
{"2130706433", "127.0.0.1"},
{"017700000001", "127.0.0.1"},
{"000000000017700000001", "127.0.0.1"},
{"192.168.0.1", "192.168.0.1"},
{"0300.0250.0000.0001", "192.168.0.1"},
{"0xc0.0xa8.0.1", "192.168.0.1"},
{"0xc0a80001", "192.168.0.1"},
{"3232235521", "192.168.0.1"},
{"4294967294", "255.255.255.254"},
{"4294967295", "255.255.255.255"},
{"037777777777", "255.255.255.255"},
{"0xFFFFFFFF", "255.255.255.255"},
{"0xFFFFFfff", "255.255.255.255"},
{"1.2.3.4.5", NULL}, /* too many parts */
{"256.0.0.1", NULL}, /* overflow */
{"1.256.0.1", NULL}, /* overflow */
{"1.1.256.1", NULL}, /* overflow */
{"1.0.0.256", NULL}, /* overflow */
{"0x100.0.0.1", NULL}, /* overflow */
{"1.0x100.0.1", NULL}, /* overflow */
{"1.1.0x100.1", NULL}, /* overflow */
{"1.1.1.0x100", NULL}, /* overflow */
{"0400.0.0.1", NULL}, /* overflow */
{"4.0400.0.1", NULL}, /* overflow */
{"4.4.0400.1", NULL}, /* overflow */
{"4.4.4.0400", NULL}, /* overflow */
{"4294967296", NULL}, /* overflow */
{"040000000000", NULL}, /* overflow */
{"0x100000000", NULL}, /* overflow */
{"1.2.3.-4", NULL}, /* negative */
{"1.2.-3.4", NULL}, /* negative */
{"1.-2.3.4", NULL}, /* negative */
{"-1.2.3.4", NULL}, /* negative */
{"-12", NULL}, /* negative */
{"-12.1", NULL}, /* negative */
{"-12.2.3", NULL}, /* negative */
{" 1.2.3.4", NULL}, /* space */
{"1. 2.3.4", NULL}, /* space */
{"1.2. 3.4", NULL}, /* space */
{"1.2.3. 4", NULL}, /* space */
};
curlx_dyn_init(&host, 256);
for(i = 0; i < CURL_ARRAYSIZE(tests); i++) {
int rc;
curlx_dyn_reset(&host);
if(curlx_dyn_add(&host, tests[i].in)) {
return CURLE_OUT_OF_MEMORY;
}
rc = ipv4_normalize(&host);
if(tests[i].out) {
if((rc != HOST_IPV4) ||
strcmp(curlx_dyn_ptr(&host), tests[i].out)) {
curl_mfprintf(stderr, "ipv4_normalize('%s') failed: "
"expected '%s', got '%s'\n",
tests[i].in, tests[i].out, curlx_dyn_ptr(&host));
fails++;
}
}
else {
if(rc == HOST_IPV4) {
curl_mfprintf(stderr, "ipv4_normalize('%s') succeeded unexpectedly:"
" got '%s'\n",
tests[i].in, curlx_dyn_ptr(&host));
fails++;
}
}
}
curlx_dyn_free(&host);
abort_if(fails, "ipv4_normalize tests failed");
}
/* Test urlencode_str */
{
struct dynbuf out;
int fails = 0;
unsigned int i;
struct urlencode_test {
const char *in;
bool relative;
bool query;
const char *out;
};
const struct urlencode_test tests[] = {
{"http://leave\x01/hello\x01world", FALSE, FALSE,
"http://leave\x01/hello%01world"},
{"http://leave/hello\x01world", FALSE, FALSE,
"http://leave/hello%01world"},
{"http://le ave/hello\x01world", FALSE, FALSE,
"http://le ave/hello%01world"},
{"hello\x01world", TRUE, FALSE, "hello%01world"},
{"hello\xf0world", TRUE, FALSE, "hello%F0world"},
{"hello world", TRUE, FALSE, "hello%20world"},
{"hello%20world", TRUE, FALSE, "hello%20world"},
{"hello world", TRUE, TRUE, "hello+world"},
{"a+b c", TRUE, FALSE, "a+b%20c"},
{"a%20b%20c", TRUE, FALSE, "a%20b%20c"},
{"a%aab%aac", TRUE, FALSE, "a%AAb%AAc"},
{"a%aab%AAc", TRUE, FALSE, "a%AAb%AAc"},
{"w%w%x", TRUE, FALSE, "w%w%x"},
{"w%wf%xf", TRUE, FALSE, "w%wf%xf"},
{"w%fw%fw", TRUE, FALSE, "w%fw%fw"},
{"a+b c", TRUE, TRUE, "a+b+c"},
{"/foo/bar", TRUE, FALSE, "/foo/bar"},
{"/foo/bar", TRUE, TRUE, "/foo/bar"},
{"/foo/ bar", TRUE, FALSE, "/foo/%20bar"},
{"/foo/ bar", TRUE, TRUE, "/foo/+bar"},
{"~-._", TRUE, FALSE, "~-._"},
{"~-._", TRUE, TRUE, "~-._"},
};
curlx_dyn_init(&out, 256);
for(i = 0; i < CURL_ARRAYSIZE(tests); i++) {
CURLUcode uc;
curlx_dyn_reset(&out);
uc = urlencode_str(&out, tests[i].in, strlen(tests[i].in),
tests[i].relative, tests[i].query);
if(uc || strcmp(curlx_dyn_ptr(&out), tests[i].out)) {
curl_mfprintf(stderr, "urlencode_str('%s', query=%d) failed:"
" expected '%s', got '%s'\n",
tests[i].in, tests[i].query, tests[i].out,
uc ? "error" : curlx_dyn_ptr(&out));
fails++;
}
}
curlx_dyn_free(&out);
abort_if(fails, "urlencode_str tests failed");
}
/* Test ipv6_parse */
{
struct Curl_URL u;
int fails = 0;
unsigned int i;
struct ipv6_test {
const char *in;
const char *out_host;
const char *out_zone;
};
const struct ipv6_test tests[] = {
{"[::1]", "[::1]", NULL},
{"[fe80::1%eth0]", "[fe80::1]", "eth0"},
{"[fe80::1%25eth0]", "[fe80::1]", "eth0"},
{"[::1", NULL, NULL}, /* missing bracket */
{"[]", NULL, NULL}, /* empty */
};
for(i = 0; i < CURL_ARRAYSIZE(tests); i++) {
CURLUcode uc;
char hostname[256];
memset(&u, 0, sizeof(u));
curlx_strcopy(hostname, sizeof(hostname),
tests[i].in, strlen(tests[i].in));
uc = ipv6_parse(&u, hostname, strlen(hostname));
if(tests[i].out_host) {
if(uc || strcmp(hostname, tests[i].out_host)) {
curl_mfprintf(stderr, "ipv6_parse('%s') host failed:"
" expected '%s', got '%s'\n",
tests[i].in, tests[i].out_host,
uc ? "error" : hostname);
fails++;
}
if(!uc && tests[i].out_zone) {
if(!u.zoneid || strcmp(u.zoneid, tests[i].out_zone)) {
curl_mfprintf(stderr, "ipv6_parse('%s') zone failed:"
" expected '%s', got '%s'\n",
tests[i].in, tests[i].out_zone,
u.zoneid ? u.zoneid : "(null)");
fails++;
}
}
}
else {
if(!uc) {
curl_mfprintf(stderr, "ipv6_parse('%s') succeeded unexpectedly\n",
tests[i].in);
fails++;
}
}
curlx_free(u.host);
curlx_free(u.zoneid);
}
abort_if(fails, "ipv6_parse tests failed");
}
/* Test parse_file */
{
CURLU *u;
const char *path;
size_t pathlen;
int fails = 0;
unsigned int i;
struct file_test {
const char *in;
const char *out_host;
const char *out_path;
bool fine;
};
const struct file_test tests[] = {
{"file:///etc/hosts", "", "/etc/hosts", TRUE},
{"file://localhost/etc/hosts", "", "/etc/hosts", TRUE},
{"file://apple/etc/hosts", "", "/etc/hosts", FALSE},
#ifdef _WIN32
{"file:///c:/windows/system32", "", "c:/windows/system32", TRUE},
{"file://localhost/c:/windows/system32", "",
"c:/windows/system32", TRUE},
#endif
};
for(i = 0; i < CURL_ARRAYSIZE(tests); i++) {
CURLUcode uc;
u = curl_url();
if(!u)
return CURLE_OUT_OF_MEMORY;
uc = parse_file(tests[i].in, strlen(tests[i].in), u, &host, &path,
&pathlen);
if(!tests[i].fine && !uc) {
curl_mfprintf(stderr, "Unexpectedly fine for input '%s'\n",
tests[i].in);
fails++;
}
else if(tests[i].out_host[0]) {
/* expecting a hostname output */
if(!curlx_dyn_len(&host) ||
strcmp(curlx_dyn_ptr(&host), tests[i].out_host))
error = TRUE;
}
if(tests[i].fine &&
(uc ||
strncmp(path, tests[i].out_path, pathlen) ||
strlen(tests[i].out_path) != pathlen)) {
curl_mfprintf(stderr, "parse_file('%s') failed:"
" expected host '%s', path '%s'; got host '%s',"
" path '%.*s'\n",
tests[i].in, tests[i].out_host, tests[i].out_path,
uc ? "error" : curlx_dyn_ptr(&host),
(int)pathlen, path);
fails++;
}
curl_url_cleanup(u);
}
abort_if(fails, "parse_file tests failed");
}
UNITTEST_END_SIMPLE
}