From 0b4ebebb06b3148a234a5a7bc95f8253dd6cb8df Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Mon, 13 Apr 2026 12:46:45 +0200 Subject: [PATCH] test1675: unit tests for URL API helper functions - ipv4_normalize - urlencode_str - ipv6_parse - parse_file urlapi: make the string URL encoder normalize to uppercase percent-encoding Closes #21296 --- lib/urlapi-int.h | 24 +++ lib/urlapi.c | 92 +++++++----- tests/data/Makefile.am | 2 +- tests/data/test1675 | 18 +++ tests/data/test58 | 2 +- tests/libtest/lib1560.c | 6 +- tests/unit/Makefile.inc | 2 +- tests/unit/unit1675.c | 314 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 414 insertions(+), 46 deletions(-) create mode 100644 tests/data/test1675 create mode 100644 tests/unit/unit1675.c diff --git a/lib/urlapi-int.h b/lib/urlapi-int.h index 1d29e545de..f635e9ae68 100644 --- a/lib/urlapi-int.h +++ b/lib/urlapi-int.h @@ -24,6 +24,30 @@ * ***************************************************************************/ #include "curl_setup.h" +#include + +/* Internal representation of CURLU. Point to URL-encoded strings. */ +struct Curl_URL { + char *scheme; + char *user; + char *password; + char *options; /* IMAP only? */ + char *host; + char *zoneid; /* for numerical IPv6 addresses */ + char *port; + char *path; + char *query; + char *fragment; + unsigned short portnum; /* the numerical version (if 'port' is set) */ + BIT(query_present); /* to support blank */ + BIT(fragment_present); /* to support blank */ + BIT(guessed_scheme); /* when a URL without scheme is parsed */ +}; + +#define HOST_ERROR (-1) /* out of memory */ +#define HOST_NAME 1 +#define HOST_IPV4 2 +#define HOST_IPV6 3 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, bool guess_scheme); diff --git a/lib/urlapi.c b/lib/urlapi.c index 1d9e949bc9..3a254b7bf3 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -63,24 +63,6 @@ #define AF_INET6 (AF_INET + 1) #endif -/* Internal representation of CURLU. Point to URL-encoded strings. */ -struct Curl_URL { - char *scheme; - char *user; - char *password; - char *options; /* IMAP only? */ - char *host; - char *zoneid; /* for numerical IPv6 addresses */ - char *port; - char *path; - char *query; - char *fragment; - unsigned short portnum; /* the numerical version (if 'port' is set) */ - BIT(query_present); /* to support blank */ - BIT(fragment_present); /* to support blank */ - BIT(guessed_scheme); /* when a URL without scheme is parsed */ -}; - #define DEFAULT_SCHEME "https" static void free_urlhandle(struct Curl_URL *u) @@ -124,15 +106,25 @@ static const char *find_host_sep(const char *url) /* urlencode_str() writes data into an output dynbuf and URL-encodes the * spaces in the source URL accordingly. * + * This function re-encodes the string, meaning that it leaves already encoded + * bytes as-is and works by encoding only what *has* to be encoded - unless it + * has to uppercase the hex to normalize. + * + * Illegal percent-encoding sequences are left as-is. + * * URL encoding should be skipped for hostnames, otherwise IDN resolution * will fail. + * + * @unittest: 1675 */ -static CURLUcode urlencode_str(struct dynbuf *o, const char *url, - size_t len, bool relative, - bool query) +UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, + size_t len, bool relative, + bool query); +UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, + size_t len, bool relative, + bool query) { /* we must add this with whitespace-replacing */ - bool left = !query; const unsigned char *iptr; const unsigned char *host_sep = (const unsigned char *)url; CURLcode result = CURLE_OK; @@ -149,7 +141,7 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url, for(iptr = host_sep; len && !result; iptr++, len--) { if(*iptr == ' ') { - if(left) + if(!query) result = curlx_dyn_addn(o, "%20", 3); else result = curlx_dyn_addn(o, "+", 1); @@ -159,10 +151,21 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url, Curl_hexbyte(&out[1], *iptr); result = curlx_dyn_addn(o, out, 3); } + else if(*iptr == '%' && ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) && + (ISLOWER(iptr[1]) || ISLOWER(iptr[2]))) { + /* uppercase it */ + unsigned char hex = (unsigned char)((curlx_hexval(iptr[1]) << 4) | + curlx_hexval(iptr[2])); + unsigned char out[3] = { '%' }; + Curl_hexbyte(&out[1], hex); + result = curlx_dyn_addn(o, out, 3); + iptr += 2; + len -= 2; + } else { result = curlx_dyn_addn(o, iptr, 1); if(*iptr == '?') - left = FALSE; + query = TRUE; } } @@ -388,9 +391,15 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, return CURLUE_OK; } -/* this assumes 'hostname' now starts with [ */ -static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, - size_t hlen) /* length of hostname */ +/* This function assumes 'hostname' now starts with [. It trims 'hostname' in + * place and it sets u->zoneid if present. + * + * @unittest: 1675 + */ +UNITTEST CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, + size_t hlen); +UNITTEST CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, + size_t hlen) /* length of hostname */ { size_t len; DEBUGASSERT(*hostname == '['); @@ -443,8 +452,10 @@ static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, return CURLUE_OK; } -static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, - size_t hlen) /* length of hostname */ +UNITTEST CURLUcode hostname_check(struct Curl_URL *u, char *hostname, + size_t hlen); +UNITTEST CURLUcode hostname_check(struct Curl_URL *u, char *hostname, + size_t hlen) /* length of hostname */ { size_t len; DEBUGASSERT(hostname); @@ -474,15 +485,12 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname, * integers. * * Returns the host type. + * + * @unittest: 1675 */ -#define HOST_ERROR (-1) /* out of memory */ - -#define HOST_NAME 1 -#define HOST_IPV4 2 -#define HOST_IPV6 3 - -static int ipv4_normalize(struct dynbuf *host) +UNITTEST int ipv4_normalize(struct dynbuf *host); +UNITTEST int ipv4_normalize(struct dynbuf *host) { bool done = FALSE; int n = 0; @@ -820,9 +828,15 @@ end: return result ? 1 : 0; /* success */ } -static CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, - struct dynbuf *host, const char **pathp, - size_t *pathlenp) +/* + * @unittest: 1675 + */ +UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, + struct dynbuf *host, const char **pathp, + size_t *pathlenp); +UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, + struct dynbuf *host, const char **pathp, + size_t *pathlenp) { const char *path; size_t pathlen; diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index 6c3ad8e34c..44ff75668f 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -223,7 +223,7 @@ test1650 test1651 test1652 test1653 test1654 test1655 test1656 test1657 \ test1658 test1659 test1660 test1661 test1662 test1663 test1664 test1665 \ test1666 test1667 test1668 test1669 \ \ -test1670 test1671 test1672 test1673 test1674 \ +test1670 test1671 test1672 test1673 test1674 test1675 \ \ test1680 test1681 test1682 test1683 test1684 test1685 \ \ diff --git a/tests/data/test1675 b/tests/data/test1675 new file mode 100644 index 0000000000..e8ee8b8e1c --- /dev/null +++ b/tests/data/test1675 @@ -0,0 +1,18 @@ + + + + +unittest +urlapi + + + + + +unittest + + +URL API unit tests + + + diff --git a/tests/data/test58 b/tests/data/test58 index 432ab4f291..6126f1a349 100644 --- a/tests/data/test58 +++ b/tests/data/test58 @@ -37,7 +37,7 @@ a few bytes # Verify data after the test has been "shot" -PUT /we/want/%TESTNUMBERte%5b%5dst.txt HTTP/1.1 +PUT /we/want/%TESTNUMBERte%5B%5Dst.txt HTTP/1.1 Host: %HOSTIP:%HTTPPORT User-Agent: curl/%VERSION Accept: */* diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 1b9416a242..0e203dd96f 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -438,11 +438,9 @@ static const struct testcase get_parts_list[] = { {"file:///C:\\programs\\foo", "file | [11] | [12] | [13] | [14] | [15] | C:\\programs\\foo | [16] | [17]", CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, - {"file://host.example.com/Share/path/to/file.txt", - "file | [11] | [12] | [13] | host.example.com | [15] | " - "//host.example.com/Share/path/to/file.txt | [16] | [17]", - CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, #endif + {"file://host.example.com/Share/path/to/file.txt", "", + CURLU_DEFAULT_SCHEME, 0, CURLUE_BAD_FILE_URL}, {"https://example.com/color/#green?no-red", "https | [11] | [12] | [13] | example.com | [15] | /color/ | [16] | " "green?no-red", diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc index 0ab45a4df1..102c15f3b2 100644 --- a/tests/unit/Makefile.inc +++ b/tests/unit/Makefile.inc @@ -42,7 +42,7 @@ TESTS_C = \ unit1650.c unit1651.c unit1652.c unit1653.c unit1654.c unit1655.c unit1656.c \ unit1657.c unit1658.c unit1660.c unit1661.c unit1663.c unit1664.c \ unit1666.c unit1667.c unit1668.c unit1669.c \ - unit1674.c \ + unit1674.c unit1675.c \ unit1979.c unit1980.c \ unit2600.c unit2601.c unit2602.c unit2603.c unit2604.c unit2605.c \ unit3200.c unit3205.c \ diff --git a/tests/unit/unit1675.c b/tests/unit/unit1675.c new file mode 100644 index 0000000000..e43ac1e7fe --- /dev/null +++ b/tests/unit/unit1675.c @@ -0,0 +1,314 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at https://curl.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + * SPDX-License-Identifier: curl + * + ***************************************************************************/ +#include "unitcheck.h" +#include "urlapi-int.h" +#include "curlx/dynbuf.h" + +static CURLcode test_unit1675(const char *arg) +{ + (void)arg; + UNITTEST_BEGIN_SIMPLE + + /* Test ipv4_normalize */ + { + struct dynbuf host; + int fails = 0; + unsigned int i; + struct ipv4_test { + const char *in; + const char *out; + }; + const struct ipv4_test tests[] = { + {"0x.0x.0x.0x", NULL}, /* invalid hex */ + {"0x.0x.0x", NULL}, /* invalid hex */ + {"0x.0x", NULL}, /* invalid hex */ + {"0x", NULL}, /* invalid hex */ + {"0", "0.0.0.0"}, + {"00", "0.0.0.0"}, + {"00000000000", "0.0.0.0"}, + {"127.0.0.1", "127.0.0.1"}, + {"0177.0.0.1", "127.0.0.1"}, + {"00177.0.0.1", "127.0.0.1"}, + {"0x7f.0.0.1", "127.0.0.1"}, + {"0x07f.0.0.1", "127.0.0.1"}, + {"1", "0.0.0.1"}, + {"010", "0.0.0.8"}, + {"001", "0.0.0.1"}, + {"127", "0.0.0.127"}, + {"127.1", "127.0.0.1"}, + {"127.0.1", "127.0.0.1"}, + {"1.16777215", "1.255.255.255"}, + {"1.16777216", NULL}, /* overflow */ + {"1.1.65535", "1.1.255.255"}, + {"1.1.65536", NULL}, /* overflow */ + {"0x7f000001", "127.0.0.1"}, + {"0x7F000001", "127.0.0.1"}, + {"0x7g000001", NULL}, /* bad hex */ + {"2130706433", "127.0.0.1"}, + {"017700000001", "127.0.0.1"}, + {"000000000017700000001", "127.0.0.1"}, + {"192.168.0.1", "192.168.0.1"}, + {"0300.0250.0000.0001", "192.168.0.1"}, + {"0xc0.0xa8.0.1", "192.168.0.1"}, + {"0xc0a80001", "192.168.0.1"}, + {"3232235521", "192.168.0.1"}, + {"4294967294", "255.255.255.254"}, + {"4294967295", "255.255.255.255"}, + {"037777777777", "255.255.255.255"}, + {"0xFFFFFFFF", "255.255.255.255"}, + {"0xFFFFFfff", "255.255.255.255"}, + {"1.2.3.4.5", NULL}, /* too many parts */ + {"256.0.0.1", NULL}, /* overflow */ + {"1.256.0.1", NULL}, /* overflow */ + {"1.1.256.1", NULL}, /* overflow */ + {"1.0.0.256", NULL}, /* overflow */ + {"0x100.0.0.1", NULL}, /* overflow */ + {"1.0x100.0.1", NULL}, /* overflow */ + {"1.1.0x100.1", NULL}, /* overflow */ + {"1.1.1.0x100", NULL}, /* overflow */ + {"0400.0.0.1", NULL}, /* overflow */ + {"4.0400.0.1", NULL}, /* overflow */ + {"4.4.0400.1", NULL}, /* overflow */ + {"4.4.4.0400", NULL}, /* overflow */ + {"4294967296", NULL}, /* overflow */ + {"040000000000", NULL}, /* overflow */ + {"0x100000000", NULL}, /* overflow */ + {"1.2.3.-4", NULL}, /* negative */ + {"1.2.-3.4", NULL}, /* negative */ + {"1.-2.3.4", NULL}, /* negative */ + {"-1.2.3.4", NULL}, /* negative */ + {"-12", NULL}, /* negative */ + {"-12.1", NULL}, /* negative */ + {"-12.2.3", NULL}, /* negative */ + {" 1.2.3.4", NULL}, /* space */ + {"1. 2.3.4", NULL}, /* space */ + {"1.2. 3.4", NULL}, /* space */ + {"1.2.3. 4", NULL}, /* space */ + }; + + curlx_dyn_init(&host, 256); + for(i = 0; i < CURL_ARRAYSIZE(tests); i++) { + int rc; + curlx_dyn_reset(&host); + if(curlx_dyn_add(&host, tests[i].in)) { + return CURLE_OUT_OF_MEMORY; + } + rc = ipv4_normalize(&host); + if(tests[i].out) { + if((rc != HOST_IPV4) || + strcmp(curlx_dyn_ptr(&host), tests[i].out)) { + curl_mfprintf(stderr, "ipv4_normalize('%s') failed: " + "expected '%s', got '%s'\n", + tests[i].in, tests[i].out, curlx_dyn_ptr(&host)); + fails++; + } + } + else { + if(rc == HOST_IPV4) { + curl_mfprintf(stderr, "ipv4_normalize('%s') succeeded unexpectedly:" + " got '%s'\n", + tests[i].in, curlx_dyn_ptr(&host)); + fails++; + } + } + } + curlx_dyn_free(&host); + abort_if(fails, "ipv4_normalize tests failed"); + } + + /* Test urlencode_str */ + { + struct dynbuf out; + int fails = 0; + unsigned int i; + struct urlencode_test { + const char *in; + bool relative; + bool query; + const char *out; + }; + const struct urlencode_test tests[] = { + {"http://leave\x01/hello\x01world", FALSE, FALSE, + "http://leave\x01/hello%01world"}, + {"http://leave/hello\x01world", FALSE, FALSE, + "http://leave/hello%01world"}, + {"http://le ave/hello\x01world", FALSE, FALSE, + "http://le ave/hello%01world"}, + {"hello\x01world", TRUE, FALSE, "hello%01world"}, + {"hello\xf0world", TRUE, FALSE, "hello%F0world"}, + {"hello world", TRUE, FALSE, "hello%20world"}, + {"hello%20world", TRUE, FALSE, "hello%20world"}, + {"hello world", TRUE, TRUE, "hello+world"}, + {"a+b c", TRUE, FALSE, "a+b%20c"}, + {"a%20b%20c", TRUE, FALSE, "a%20b%20c"}, + {"a%aab%aac", TRUE, FALSE, "a%AAb%AAc"}, + {"a%aab%AAc", TRUE, FALSE, "a%AAb%AAc"}, + {"w%w%x", TRUE, FALSE, "w%w%x"}, + {"w%wf%xf", TRUE, FALSE, "w%wf%xf"}, + {"w%fw%fw", TRUE, FALSE, "w%fw%fw"}, + {"a+b c", TRUE, TRUE, "a+b+c"}, + {"/foo/bar", TRUE, FALSE, "/foo/bar"}, + {"/foo/bar", TRUE, TRUE, "/foo/bar"}, + {"/foo/ bar", TRUE, FALSE, "/foo/%20bar"}, + {"/foo/ bar", TRUE, TRUE, "/foo/+bar"}, + {"~-._", TRUE, FALSE, "~-._"}, + {"~-._", TRUE, TRUE, "~-._"}, + }; + + curlx_dyn_init(&out, 256); + for(i = 0; i < CURL_ARRAYSIZE(tests); i++) { + CURLUcode uc; + curlx_dyn_reset(&out); + uc = urlencode_str(&out, tests[i].in, strlen(tests[i].in), + tests[i].relative, tests[i].query); + if(uc || strcmp(curlx_dyn_ptr(&out), tests[i].out)) { + curl_mfprintf(stderr, "urlencode_str('%s', query=%d) failed:" + " expected '%s', got '%s'\n", + tests[i].in, tests[i].query, tests[i].out, + uc ? "error" : curlx_dyn_ptr(&out)); + fails++; + } + } + curlx_dyn_free(&out); + abort_if(fails, "urlencode_str tests failed"); + } + + /* Test ipv6_parse */ + { + struct Curl_URL u; + int fails = 0; + unsigned int i; + struct ipv6_test { + const char *in; + const char *out_host; + const char *out_zone; + }; + const struct ipv6_test tests[] = { + {"[::1]", "[::1]", NULL}, + {"[fe80::1%eth0]", "[fe80::1]", "eth0"}, + {"[fe80::1%25eth0]", "[fe80::1]", "eth0"}, + {"[::1", NULL, NULL}, /* missing bracket */ + {"[]", NULL, NULL}, /* empty */ + }; + + for(i = 0; i < CURL_ARRAYSIZE(tests); i++) { + CURLUcode uc; + char hostname[256]; + memset(&u, 0, sizeof(u)); + curlx_strcopy(hostname, sizeof(hostname), + tests[i].in, strlen(tests[i].in)); + uc = ipv6_parse(&u, hostname, strlen(hostname)); + if(tests[i].out_host) { + if(uc || strcmp(hostname, tests[i].out_host)) { + curl_mfprintf(stderr, "ipv6_parse('%s') host failed:" + " expected '%s', got '%s'\n", + tests[i].in, tests[i].out_host, + uc ? "error" : hostname); + fails++; + } + if(!uc && tests[i].out_zone) { + if(!u.zoneid || strcmp(u.zoneid, tests[i].out_zone)) { + curl_mfprintf(stderr, "ipv6_parse('%s') zone failed:" + " expected '%s', got '%s'\n", + tests[i].in, tests[i].out_zone, + u.zoneid ? u.zoneid : "(null)"); + fails++; + } + } + } + else { + if(!uc) { + curl_mfprintf(stderr, "ipv6_parse('%s') succeeded unexpectedly\n", + tests[i].in); + fails++; + } + } + curlx_free(u.host); + curlx_free(u.zoneid); + } + abort_if(fails, "ipv6_parse tests failed"); + } + + /* Test parse_file */ + { + CURLU *u; + const char *path; + size_t pathlen; + int fails = 0; + unsigned int i; + struct file_test { + const char *in; + const char *out_host; + const char *out_path; + bool fine; + }; + const struct file_test tests[] = { + {"file:///etc/hosts", "", "/etc/hosts", TRUE}, + {"file://localhost/etc/hosts", "", "/etc/hosts", TRUE}, + {"file://apple/etc/hosts", "", "/etc/hosts", FALSE}, +#ifdef _WIN32 + {"file:///c:/windows/system32", "", "c:/windows/system32", TRUE}, + {"file://localhost/c:/windows/system32", "", + "c:/windows/system32", TRUE}, +#endif + }; + + for(i = 0; i < CURL_ARRAYSIZE(tests); i++) { + CURLUcode uc; + u = curl_url(); + if(!u) + return CURLE_OUT_OF_MEMORY; + + uc = parse_file(tests[i].in, strlen(tests[i].in), u, &host, &path, + &pathlen); + if(!tests[i].fine && !uc) { + curl_mfprintf(stderr, "Unexpectedly fine for input '%s'\n", + tests[i].in); + fails++; + } + else if(tests[i].out_host[0]) { + /* expecting a hostname output */ + if(!curlx_dyn_len(&host) || + strcmp(curlx_dyn_ptr(&host), tests[i].out_host)) + error = TRUE; + } + if(tests[i].fine && + (uc || + strncmp(path, tests[i].out_path, pathlen) || + strlen(tests[i].out_path) != pathlen)) { + curl_mfprintf(stderr, "parse_file('%s') failed:" + " expected host '%s', path '%s'; got host '%s'," + " path '%.*s'\n", + tests[i].in, tests[i].out_host, tests[i].out_path, + uc ? "error" : curlx_dyn_ptr(&host), + (int)pathlen, path); + fails++; + } + curl_url_cleanup(u); + } + abort_if(fails, "parse_file tests failed"); + } + + UNITTEST_END_SIMPLE +}