diff --git a/lib/urlapi-int.h b/lib/urlapi-int.h index f635e9ae68..129ee0481f 100644 --- a/lib/urlapi-int.h +++ b/lib/urlapi-int.h @@ -49,6 +49,10 @@ struct Curl_URL { #define HOST_IPV4 2 #define HOST_IPV6 3 +#define QUERY_NO 2 +#define QUERY_NOT_YET 3 /* allow to change to query */ +#define QUERY_YES 4 + size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, bool guess_scheme); diff --git a/lib/urlapi.c b/lib/urlapi.c index 3a254b7bf3..a3036dcf6a 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -115,20 +115,25 @@ static const char *find_host_sep(const char *url) * URL encoding should be skipped for hostnames, otherwise IDN resolution * will fail. * + * 'query' tells if it is a query part or not, or if it is allowed to + * "transition" into a query part with a question mark. + * * @unittest: 1675 */ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, size_t len, bool relative, - bool query); + unsigned int query); UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, size_t len, bool relative, - bool query) + unsigned int query) { /* we must add this with whitespace-replacing */ const unsigned char *iptr; const unsigned char *host_sep = (const unsigned char *)url; CURLcode result = CURLE_OK; + DEBUGASSERT((query >= QUERY_NO) && (query <= QUERY_YES)); + if(!relative) { size_t n; host_sep = (const unsigned char *)find_host_sep(url); @@ -141,7 +146,7 @@ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, for(iptr = host_sep; len && !result; iptr++, len--) { if(*iptr == ' ') { - if(!query) + if(query != QUERY_YES) result = curlx_dyn_addn(o, "%20", 3); else result = curlx_dyn_addn(o, "+", 1); @@ -151,7 +156,8 @@ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, Curl_hexbyte(&out[1], *iptr); result = curlx_dyn_addn(o, out, 3); } - else if(*iptr == '%' && ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) && + else if(*iptr == '%' && (len >= 3) && + ISXDIGIT(iptr[1]) && ISXDIGIT(iptr[2]) && (ISLOWER(iptr[1]) || ISLOWER(iptr[2]))) { /* uppercase it */ unsigned char hex = (unsigned char)((curlx_hexval(iptr[1]) << 4) | @@ -164,8 +170,8 @@ UNITTEST CURLUcode urlencode_str(struct dynbuf *o, const char *url, } else { result = curlx_dyn_addn(o, iptr, 1); - if(*iptr == '?') - query = TRUE; + if(*iptr == '?' && (query == QUERY_NOT_YET)) + query = QUERY_YES; } } @@ -832,15 +838,12 @@ end: * @unittest: 1675 */ UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, - struct dynbuf *host, const char **pathp, - size_t *pathlenp); + const char **pathp, size_t *pathlenp); UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, - struct dynbuf *host, const char **pathp, - size_t *pathlenp) + const char **pathp, size_t *pathlenp) { const char *path; size_t pathlen; - bool uncpath = FALSE; if(urllen <= 6) /* file:/ is not enough to actually be a complete file: URL */ return CURLUE_BAD_FILE_URL; @@ -872,9 +875,6 @@ UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, * * o the hostname is a FQDN that resolves to this machine, or * - * o it is an UNC String transformed to an URI (Windows only, RFC 8089 - * Appendix E.3). - * * For brevity, we only consider URLs with empty, "localhost", or * "127.0.0.1" hostnames as local, otherwise as an UNC String. * @@ -889,42 +889,16 @@ UNITTEST CURLUcode parse_file(const char *url, size_t urllen, CURLU *u, checkprefix("127.0.0.1/", ptr)) { ptr += 9; /* now points to the slash after the host */ } - else { -#ifdef _WIN32 - size_t len; - - /* the hostname, NetBIOS computer name, can not contain disallowed - chars, and the delimiting slash character must be appended to the - hostname */ - path = strpbrk(ptr, "/\\:*?\"<>|"); - if(!path || *path != '/') - return CURLUE_BAD_FILE_URL; - - len = path - ptr; - if(len) { - CURLcode code = curlx_dyn_addn(host, ptr, len); - if(code) - return cc2cu(code); - uncpath = TRUE; - } - - ptr -= 2; /* now points to the // before the host in UNC */ -#else + else /* Invalid file://hostname/, expected localhost or 127.0.0.1 or none */ return CURLUE_BAD_FILE_URL; -#endif - } } path = ptr; pathlen = urllen - (ptr - url); } - if(!uncpath) - /* no host for file: URLs by default */ - curlx_dyn_reset(host); - #if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__) /* Do not allow Windows drive letters when not in Windows. * This catches both "file:/c:" and "file:c:" */ @@ -1033,7 +1007,7 @@ static CURLUcode handle_fragment(CURLU *u, const char *fragment, if(flags & CURLU_URLENCODE) { struct dynbuf enc; curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - ures = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE); + ures = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, QUERY_NO); if(ures) return ures; u->fragment = curlx_dyn_ptr(&enc); @@ -1057,7 +1031,7 @@ static CURLUcode handle_query(CURLU *u, const char *query, CURLUcode ures; curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); /* skip the leading question mark */ - ures = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE); + ures = urlencode_str(&enc, query + 1, qlen - 1, TRUE, QUERY_YES); if(ures) return ures; u->query = curlx_dyn_ptr(&enc); @@ -1085,7 +1059,7 @@ static CURLUcode handle_path(CURLU *u, const char *path, if(pathlen && (flags & CURLU_URLENCODE)) { struct dynbuf enc; curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - ures = urlencode_str(&enc, path, pathlen, TRUE, FALSE); + ures = urlencode_str(&enc, path, pathlen, TRUE, QUERY_NO); if(ures) return ures; pathlen = curlx_dyn_len(&enc); @@ -1145,7 +1119,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) /* handle the file: scheme */ if(schemelen && !strcmp(schemebuf, "file")) { is_file = TRUE; - ures = parse_file(url, urllen, u, &host, &path, &pathlen); + ures = parse_file(url, urllen, u, &path, &pathlen); } else { const char *hostp = NULL; @@ -1287,7 +1261,8 @@ static CURLUcode redirect_url(const char *base, const char *relurl, curlx_dyn_init(&urlbuf, CURL_MAX_INPUT_LENGTH); if(!curlx_dyn_addn(&urlbuf, base, prelen) && - !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, FALSE)) { + !urlencode_str(&urlbuf, useurl, strlen(useurl), !host_changed, + QUERY_NOT_YET)) { uc = parseurl_and_replace(curlx_dyn_ptr(&urlbuf), u, flags & ~U_CURLU_PATH_AS_IS); } @@ -1407,7 +1382,8 @@ static CURLUcode urlget_format(const CURLU *u, CURLUPart what, if(urlencode) { struct dynbuf enc; curlx_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); - uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY); + uc = urlencode_str(&enc, part, partlen, TRUE, what == CURLUPART_QUERY ? + QUERY_YES : QUERY_NO); curlx_free(part); if(uc) return uc; diff --git a/tests/unit/unit1675.c b/tests/unit/unit1675.c index e43ac1e7fe..25616c6727 100644 --- a/tests/unit/unit1675.c +++ b/tests/unit/unit1675.c @@ -145,35 +145,37 @@ static CURLcode test_unit1675(const char *arg) struct urlencode_test { const char *in; bool relative; - bool query; + unsigned int query; const char *out; }; const struct urlencode_test tests[] = { - {"http://leave\x01/hello\x01world", FALSE, FALSE, + {"http://leave\x01/hello\x01world", FALSE, QUERY_NO, "http://leave\x01/hello%01world"}, - {"http://leave/hello\x01world", FALSE, FALSE, + {"http://leave/hello\x01world", FALSE, QUERY_NO, "http://leave/hello%01world"}, - {"http://le ave/hello\x01world", FALSE, FALSE, + {"http://le ave/hello\x01world", FALSE, QUERY_NO, "http://le ave/hello%01world"}, - {"hello\x01world", TRUE, FALSE, "hello%01world"}, - {"hello\xf0world", TRUE, FALSE, "hello%F0world"}, - {"hello world", TRUE, FALSE, "hello%20world"}, - {"hello%20world", TRUE, FALSE, "hello%20world"}, - {"hello world", TRUE, TRUE, "hello+world"}, - {"a+b c", TRUE, FALSE, "a+b%20c"}, - {"a%20b%20c", TRUE, FALSE, "a%20b%20c"}, - {"a%aab%aac", TRUE, FALSE, "a%AAb%AAc"}, - {"a%aab%AAc", TRUE, FALSE, "a%AAb%AAc"}, - {"w%w%x", TRUE, FALSE, "w%w%x"}, - {"w%wf%xf", TRUE, FALSE, "w%wf%xf"}, - {"w%fw%fw", TRUE, FALSE, "w%fw%fw"}, - {"a+b c", TRUE, TRUE, "a+b+c"}, - {"/foo/bar", TRUE, FALSE, "/foo/bar"}, - {"/foo/bar", TRUE, TRUE, "/foo/bar"}, - {"/foo/ bar", TRUE, FALSE, "/foo/%20bar"}, - {"/foo/ bar", TRUE, TRUE, "/foo/+bar"}, - {"~-._", TRUE, FALSE, "~-._"}, - {"~-._", TRUE, TRUE, "~-._"}, + {"hello\x01world", TRUE, QUERY_NO, "hello%01world"}, + {"hello\xf0world", TRUE, QUERY_NO, "hello%F0world"}, + {"hello world", TRUE, QUERY_NO, "hello%20world"}, + {"hello%20world", TRUE, QUERY_NO, "hello%20world"}, + {"hello world", TRUE, QUERY_YES, "hello+world"}, + {"a+b c", TRUE, QUERY_NO, "a+b%20c"}, + {"a%20b%20c", TRUE, QUERY_NO, "a%20b%20c"}, + {"a%aab%aac", TRUE, QUERY_NO, "a%AAb%AAc"}, + {"a%aab%AAc", TRUE, QUERY_NO, "a%AAb%AAc"}, + {"w%w%x", TRUE, QUERY_NO, "w%w%x"}, + {"w%wf%xf", TRUE, QUERY_NO, "w%wf%xf"}, + {"w%fw%fw", TRUE, QUERY_NO, "w%fw%fw"}, + {"a+b c", TRUE, QUERY_YES, "a+b+c"}, + {"/foo/bar", TRUE, QUERY_NO, "/foo/bar"}, + {"/foo/bar", TRUE, QUERY_YES, "/foo/bar"}, + {"/foo/ bar", TRUE, QUERY_NO, "/foo/%20bar"}, + {"/foo/ bar", TRUE, QUERY_YES, "/foo/+bar"}, + {"~-._", TRUE, QUERY_NO, "~-._"}, + {"~-._", TRUE, QUERY_YES, "~-._"}, + {"foo bar?foo bar", TRUE, QUERY_NO, "foo%20bar?foo%20bar"}, + {"foo bar?foo bar", TRUE, QUERY_NOT_YET, "foo%20bar?foo+bar"}, }; curlx_dyn_init(&out, 256); @@ -259,18 +261,16 @@ static CURLcode test_unit1675(const char *arg) unsigned int i; struct file_test { const char *in; - const char *out_host; const char *out_path; bool fine; }; const struct file_test tests[] = { - {"file:///etc/hosts", "", "/etc/hosts", TRUE}, - {"file://localhost/etc/hosts", "", "/etc/hosts", TRUE}, - {"file://apple/etc/hosts", "", "/etc/hosts", FALSE}, + {"file:///etc/hosts", "/etc/hosts", TRUE}, + {"file://localhost/etc/hosts", "/etc/hosts", TRUE}, + {"file://apple/etc/hosts", "/etc/hosts", FALSE}, #ifdef _WIN32 - {"file:///c:/windows/system32", "", "c:/windows/system32", TRUE}, - {"file://localhost/c:/windows/system32", "", - "c:/windows/system32", TRUE}, + {"file:///c:/windows/system32", "c:/windows/system32", TRUE}, + {"file://localhost/c:/windows/system32", "c:/windows/system32", TRUE}, #endif }; @@ -280,28 +280,19 @@ static CURLcode test_unit1675(const char *arg) if(!u) return CURLE_OUT_OF_MEMORY; - uc = parse_file(tests[i].in, strlen(tests[i].in), u, &host, &path, - &pathlen); + uc = parse_file(tests[i].in, strlen(tests[i].in), u, &path, &pathlen); if(!tests[i].fine && !uc) { curl_mfprintf(stderr, "Unexpectedly fine for input '%s'\n", tests[i].in); fails++; } - else if(tests[i].out_host[0]) { - /* expecting a hostname output */ - if(!curlx_dyn_len(&host) || - strcmp(curlx_dyn_ptr(&host), tests[i].out_host)) - error = TRUE; - } if(tests[i].fine && (uc || strncmp(path, tests[i].out_path, pathlen) || strlen(tests[i].out_path) != pathlen)) { curl_mfprintf(stderr, "parse_file('%s') failed:" - " expected host '%s', path '%s'; got host '%s'," - " path '%.*s'\n", - tests[i].in, tests[i].out_host, tests[i].out_path, - uc ? "error" : curlx_dyn_ptr(&host), + " expected path '%s'; got path '%.*s'\n", + tests[i].in, tests[i].out_path, (int)pathlen, path); fails++; }