See header file and man pages for API. All documented API details work
and are tested in the 1560 test case.

Closes #2842
This commit is contained in:
Daniel Stenberg 2018-08-05 11:51:07 +02:00
parent 17ca0ccff4
commit fb30ac5a2d
No known key found for this signature in database
GPG key ID: 5CC908FDB71E12C2
24 changed files with 2814 additions and 365 deletions

View file

@ -55,7 +55,7 @@ LIB_CFILES = file.c timeval.c base64.c hostip.c progress.c formdata.c \
curl_multibyte.c hostcheck.c conncache.c pipeline.c dotdot.c \
x509asn1.c http2.c smb.c curl_endian.c curl_des.c system_win32.c \
mime.c sha256.c setopt.c curl_path.c curl_ctype.c curl_range.c psl.c \
doh.c
doh.c urlapi.c
LIB_HFILES = arpa_telnet.h netrc.h file.h timeval.h hostip.h progress.h \
formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h if2ip.h \
@ -75,7 +75,7 @@ LIB_HFILES = arpa_telnet.h netrc.h file.h timeval.h hostip.h progress.h \
curl_setup_once.h multihandle.h setup-vms.h pipeline.h dotdot.h \
x509asn1.h http2.h sigpipe.h smb.h curl_endian.h curl_des.h \
curl_printf.h system_win32.h rand.h mime.h curl_sha256.h setopt.h \
curl_path.h curl_ctype.h curl_range.h psl.h doh.h
curl_path.h curl_ctype.h curl_range.h psl.h doh.h urlapi-int.h
LIB_RCFILES = libcurl.rc

View file

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -41,7 +41,7 @@
its behavior is altered by the current locale.
See https://tools.ietf.org/html/rfc3986#section-2.3
*/
static bool Curl_isunreserved(unsigned char in)
bool Curl_isunreserved(unsigned char in)
{
switch(in) {
case '0': case '1': case '2': case '3': case '4':
@ -141,6 +141,8 @@ char *curl_easy_escape(struct Curl_easy *data, const char *string,
* Returns a pointer to a malloced string in *ostring with length given in
* *olen. If length == 0, the length is assumed to be strlen(string).
*
* 'data' can be set to NULL but then this function can't convert network
* data to host for non-ascii.
*/
CURLcode Curl_urldecode(struct Curl_easy *data,
const char *string, size_t length,
@ -151,7 +153,7 @@ CURLcode Curl_urldecode(struct Curl_easy *data,
char *ns = malloc(alloc);
size_t strindex = 0;
unsigned long hex;
CURLcode result;
CURLcode result = CURLE_OK;
if(!ns)
return CURLE_OUT_OF_MEMORY;
@ -171,11 +173,13 @@ CURLcode Curl_urldecode(struct Curl_easy *data,
in = curlx_ultouc(hex); /* this long is never bigger than 255 anyway */
result = Curl_convert_from_network(data, (char *)&in, 1);
if(result) {
/* Curl_convert_from_network calls failf if unsuccessful */
free(ns);
return result;
if(data) {
result = Curl_convert_from_network(data, (char *)&in, 1);
if(result) {
/* Curl_convert_from_network calls failf if unsuccessful */
free(ns);
return result;
}
}
string += 2;

View file

@ -7,7 +7,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2011, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -24,6 +24,7 @@
/* Escape and unescape URL encoding in strings. The functions return a new
* allocated string or NULL if an error occurred. */
bool Curl_isunreserved(unsigned char in);
CURLcode Curl_urldecode(struct Curl_easy *data,
const char *string, size_t length,
char **ostring, size_t *olen,

View file

@ -159,7 +159,8 @@ const struct Curl_handler Curl_handler_imaps = {
ZERO_NULL, /* connection_check */
PORT_IMAPS, /* defport */
CURLPROTO_IMAPS, /* protocol */
PROTOPT_CLOSEACTION | PROTOPT_SSL /* flags */
PROTOPT_CLOSEACTION | PROTOPT_SSL | /* flags */
PROTOPT_URLOPTIONS
};
#endif

View file

@ -75,6 +75,7 @@
#include "http2.h"
#include "mime.h"
#include "strcase.h"
#include "urlapi-int.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@ -1454,311 +1455,6 @@ CURLcode Curl_posttransfer(struct Curl_easy *data)
return CURLE_OK;
}
#ifndef CURL_DISABLE_HTTP
/*
* Find the separator at the end of the host name, or the '?' in cases like
* http://www.url.com?id=2380
*/
static const char *find_host_sep(const char *url)
{
const char *sep;
const char *query;
/* Find the start of the hostname */
sep = strstr(url, "//");
if(!sep)
sep = url;
else
sep += 2;
query = strchr(sep, '?');
sep = strchr(sep, '/');
if(!sep)
sep = url + strlen(url);
if(!query)
query = url + strlen(url);
return sep < query ? sep : query;
}
/*
* Decide in an encoding-independent manner whether a character in an
* URL must be escaped. The same criterion must be used in strlen_url()
* and strcpy_url().
*/
static bool urlchar_needs_escaping(int c)
{
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
}
/*
* strlen_url() returns the length of the given URL if the spaces within the
* URL were properly URL encoded.
* URL encoding should be skipped for host names, otherwise IDN resolution
* will fail.
*/
static size_t strlen_url(const char *url, bool relative)
{
const unsigned char *ptr;
size_t newlen = 0;
bool left = TRUE; /* left side of the ? */
const unsigned char *host_sep = (const unsigned char *) url;
if(!relative)
host_sep = (const unsigned char *) find_host_sep(url);
for(ptr = (unsigned char *)url; *ptr; ptr++) {
if(ptr < host_sep) {
++newlen;
continue;
}
switch(*ptr) {
case '?':
left = FALSE;
/* FALLTHROUGH */
default:
if(urlchar_needs_escaping(*ptr))
newlen += 2;
newlen++;
break;
case ' ':
if(left)
newlen += 3;
else
newlen++;
break;
}
}
return newlen;
}
/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in
* the source URL accordingly.
* URL encoding should be skipped for host names, otherwise IDN resolution
* will fail.
*/
static void strcpy_url(char *output, const char *url, bool relative)
{
/* we must add this with whitespace-replacing */
bool left = TRUE;
const unsigned char *iptr;
char *optr = output;
const unsigned char *host_sep = (const unsigned char *) url;
if(!relative)
host_sep = (const unsigned char *) find_host_sep(url);
for(iptr = (unsigned char *)url; /* read from here */
*iptr; /* until zero byte */
iptr++) {
if(iptr < host_sep) {
*optr++ = *iptr;
continue;
}
switch(*iptr) {
case '?':
left = FALSE;
/* FALLTHROUGH */
default:
if(urlchar_needs_escaping(*iptr)) {
snprintf(optr, 4, "%%%02x", *iptr);
optr += 3;
}
else
*optr++=*iptr;
break;
case ' ':
if(left) {
*optr++='%'; /* add a '%' */
*optr++='2'; /* add a '2' */
*optr++='0'; /* add a '0' */
}
else
*optr++='+'; /* add a '+' here */
break;
}
}
*optr = 0; /* zero terminate output buffer */
}
/*
* Returns true if the given URL is absolute (as opposed to relative)
*/
static bool is_absolute_url(const char *url)
{
char prot[16]; /* URL protocol string storage */
char letter; /* used for a silly sscanf */
return (2 == sscanf(url, "%15[^?&/:]://%c", prot, &letter)) ? TRUE : FALSE;
}
/*
* Concatenate a relative URL to a base URL making it absolute.
* URL-encodes any spaces.
* The returned pointer must be freed by the caller unless NULL
* (returns NULL on out of memory).
*/
static char *concat_url(const char *base, const char *relurl)
{
/***
TRY to append this new path to the old URL
to the right of the host part. Oh crap, this is doomed to cause
problems in the future...
*/
char *newest;
char *protsep;
char *pathsep;
size_t newlen;
bool host_changed = FALSE;
const char *useurl = relurl;
size_t urllen;
/* we must make our own copy of the URL to play with, as it may
point to read-only data */
char *url_clone = strdup(base);
if(!url_clone)
return NULL; /* skip out of this NOW */
/* protsep points to the start of the host name */
protsep = strstr(url_clone, "//");
if(!protsep)
protsep = url_clone;
else
protsep += 2; /* pass the slashes */
if('/' != relurl[0]) {
int level = 0;
/* First we need to find out if there's a ?-letter in the URL,
and cut it and the right-side of that off */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep = 0;
/* we have a relative path to append to the last slash if there's one
available, or if the new URL is just a query string (starts with a
'?') we append the new one at the end of the entire currently worked
out URL */
if(useurl[0] != '?') {
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep = 0;
}
/* Check if there's any slash after the host name, and if so, remember
that position instead */
pathsep = strchr(protsep, '/');
if(pathsep)
protsep = pathsep + 1;
else
protsep = NULL;
/* now deal with one "./" or any amount of "../" in the newurl
and act accordingly */
if((useurl[0] == '.') && (useurl[1] == '/'))
useurl += 2; /* just skip the "./" */
while((useurl[0] == '.') &&
(useurl[1] == '.') &&
(useurl[2] == '/')) {
level++;
useurl += 3; /* pass the "../" */
}
if(protsep) {
while(level--) {
/* cut off one more level from the right of the original URL */
pathsep = strrchr(protsep, '/');
if(pathsep)
*pathsep = 0;
else {
*protsep = 0;
break;
}
}
}
}
else {
/* We got a new absolute path for this server */
if((relurl[0] == '/') && (relurl[1] == '/')) {
/* the new URL starts with //, just keep the protocol part from the
original one */
*protsep = 0;
useurl = &relurl[2]; /* we keep the slashes from the original, so we
skip the new ones */
host_changed = TRUE;
}
else {
/* cut off the original URL from the first slash, or deal with URLs
without slash */
pathsep = strchr(protsep, '/');
if(pathsep) {
/* When people use badly formatted URLs, such as
"http://www.url.com?dir=/home/daniel" we must not use the first
slash, if there's a ?-letter before it! */
char *sep = strchr(protsep, '?');
if(sep && (sep < pathsep))
pathsep = sep;
*pathsep = 0;
}
else {
/* There was no slash. Now, since we might be operating on a badly
formatted URL, such as "http://www.url.com?id=2380" which doesn't
use a slash separator as it is supposed to, we need to check for a
?-letter as well! */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep = 0;
}
}
}
/* If the new part contains a space, this is a mighty stupid redirect
but we still make an effort to do "right". To the left of a '?'
letter we replace each space with %20 while it is replaced with '+'
on the right side of the '?' letter.
*/
newlen = strlen_url(useurl, !host_changed);
urllen = strlen(url_clone);
newest = malloc(urllen + 1 + /* possible slash */
newlen + 1 /* zero byte */);
if(!newest) {
free(url_clone); /* don't leak this */
return NULL;
}
/* copy over the root url part */
memcpy(newest, url_clone, urllen);
/* check if we need to append a slash */
if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
;
else
newest[urllen++]='/';
/* then append the new piece on the right side */
strcpy_url(&newest[urllen], useurl, !host_changed);
free(url_clone);
return newest;
}
#endif /* CURL_DISABLE_HTTP */
/*
* Curl_follow() handles the URL redirect magic. Pass in the 'newurl' string
* as given by the remote server and set up the new URL to request.
@ -1810,12 +1506,12 @@ CURLcode Curl_follow(struct Curl_easy *data,
}
}
if(!is_absolute_url(newurl)) {
if(!Curl_is_absolute_url(newurl, NULL, 8)) {
/***
*DANG* this is an RFC 2068 violation. The URL is supposed
to be absolute and this doesn't seem to be that!
*/
char *absolute = concat_url(data->change.url, newurl);
char *absolute = Curl_concat_url(data->change.url, newurl);
if(!absolute)
return CURLE_OUT_OF_MEMORY;
newurl = absolute;
@ -1824,7 +1520,7 @@ CURLcode Curl_follow(struct Curl_easy *data,
/* The new URL MAY contain space or high byte values, that means a mighty
stupid redirect URL but we still make an effort to do "right". */
char *newest;
size_t newlen = strlen_url(newurl, FALSE);
size_t newlen = Curl_strlen_url(newurl, FALSE);
/* This is an absolute URL, don't allow the custom port number */
disallowport = TRUE;
@ -1833,7 +1529,7 @@ CURLcode Curl_follow(struct Curl_easy *data,
if(!newest)
return CURLE_OUT_OF_MEMORY;
strcpy_url(newest, newurl, FALSE); /* create a space-free URL */
Curl_strcpy_url(newest, newurl, FALSE); /* create a space-free URL */
newurl = newest; /* use this instead now */
}

View file

@ -1944,30 +1944,37 @@ static struct connectdata *allocate_conn(struct Curl_easy *data)
return NULL;
}
/* returns the handdler if the given scheme is built-in */
const struct Curl_handler *Curl_builtin_scheme(const char *scheme)
{
const struct Curl_handler * const *pp;
const struct Curl_handler *p;
/* Scan protocol handler table and match against 'scheme'. The handler may
be changed later when the protocol specific setup function is called. */
for(pp = protocols; (p = *pp) != NULL; pp++)
if(strcasecompare(p->scheme, scheme))
/* Protocol found in table. Check if allowed */
return p;
return NULL; /* not found */
}
static CURLcode findprotocol(struct Curl_easy *data,
struct connectdata *conn,
const char *protostr)
{
const struct Curl_handler * const *pp;
const struct Curl_handler *p;
const struct Curl_handler *p = Curl_builtin_scheme(protostr);
/* Scan protocol handler table and match against 'protostr' to set a few
variables based on the URL. Now that the handler may be changed later
when the protocol specific setup function is called. */
for(pp = protocols; (p = *pp) != NULL; pp++) {
if(strcasecompare(p->scheme, protostr)) {
/* Protocol found in table. Check if allowed */
if(!(data->set.allowed_protocols & p->protocol))
/* nope, get out */
break;
/* it is allowed for "normal" request, now do an extra check if this is
the result of a redirect */
if(data->state.this_is_a_follow &&
!(data->set.redir_protocols & p->protocol))
/* nope, get out */
break;
if(p && /* Protocol found in table. Check if allowed */
(data->set.allowed_protocols & p->protocol)) {
/* it is allowed for "normal" request, now do an extra check if this is
the result of a redirect */
if(data->state.this_is_a_follow &&
!(data->set.redir_protocols & p->protocol))
/* nope, get out */
;
else {
/* Perform setup complement if some. */
conn->handler = conn->given = p;
@ -1976,7 +1983,6 @@ static CURLcode findprotocol(struct Curl_easy *data,
}
}
/* The protocol was not found in the table, but we don't have to assign it
to anything since it is already assigned to a dummy-struct in the
create_conn() function when the connectdata struct is allocated. */

View file

@ -79,6 +79,8 @@ void Curl_getoff_all_pipelines(struct Curl_easy *data,
CURLcode Curl_upkeep(struct conncache *conn_cache, void *data);
const struct Curl_handler *Curl_builtin_scheme(const char *scheme);
#define CURL_DEFAULT_PROXY_PORT 1080 /* default proxy port unless specified */
#define CURL_DEFAULT_HTTPS_PROXY_PORT 443 /* default https proxy port unless
specified */

29
lib/urlapi-int.h Normal file
View file

@ -0,0 +1,29 @@
#ifndef HEADER_CURL_URLAPI_INT_H
#define HEADER_CURL_URLAPI_INT_H
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.haxx.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
#include "curl_setup.h"
bool Curl_is_absolute_url(const char *url, char *scheme, size_t buflen);
char *Curl_concat_url(const char *base, const char *relurl);
size_t Curl_strlen_url(const char *url, bool relative);
void Curl_strcpy_url(char *output, const char *url, bool relative);
#endif

1315
lib/urlapi.c Normal file

File diff suppressed because it is too large Load diff