aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/curl/lib/urlapi.c
diff options
context:
space:
mode:
authorAlexSm <alex@ydb.tech>2024-01-18 11:28:56 +0100
committerGitHub <noreply@github.com>2024-01-18 11:28:56 +0100
commit9d0a3761b3201e0d9db879a7adf91876ebdb0564 (patch)
tree541d11ac878c18efd7ebca81e35112aa0fef995b /contrib/libs/curl/lib/urlapi.c
parent404ef8886ecc9736bc58ade6da2fbd83b486a408 (diff)
downloadydb-9d0a3761b3201e0d9db879a7adf91876ebdb0564.tar.gz
Library import 8 (#1074)
* Library import 8 * Add contrib/libs/cxxsupp/libcxx/include/__verbose_abort
Diffstat (limited to 'contrib/libs/curl/lib/urlapi.c')
-rw-r--r--contrib/libs/curl/lib/urlapi.c987
1 files changed, 527 insertions, 460 deletions
diff --git a/contrib/libs/curl/lib/urlapi.c b/contrib/libs/curl/lib/urlapi.c
index 7dac81c85c..0d11e48c92 100644
--- a/contrib/libs/curl/lib/urlapi.c
+++ b/contrib/libs/curl/lib/urlapi.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -33,6 +33,8 @@
#include "inet_pton.h"
#include "inet_ntop.h"
#include "strdup.h"
+#include "idn.h"
+#include "curl_memrchr.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
@@ -56,6 +58,15 @@
/* scheme is not URL encoded, the longest libcurl supported ones are... */
#define MAX_SCHEME_LEN 40
+/*
+ * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
+ * sure we have _some_ value for AF_INET6 without polluting our fake value
+ * everywhere.
+ */
+#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
+#define AF_INET6 (AF_INET + 1)
+#endif
+
/* Internal representation of CURLU. Point to URL-encoded strings. */
struct Curl_URL {
char *scheme;
@@ -89,7 +100,7 @@ static void free_urlhandle(struct Curl_URL *u)
/*
* Find the separator at the end of the host name, or the '?' in cases like
- * http://www.url.com?id=2380
+ * http://www.example.com?id=2380
*/
static const char *find_host_sep(const char *url)
{
@@ -116,14 +127,11 @@ static const char *find_host_sep(const char *url)
}
/*
- * Decide in an encoding-independent manner whether a character in a URL must
- * be escaped. This is used in urlencode_str().
+ * Decide whether a character in a URL must be escaped.
*/
-static bool urlchar_needs_escaping(int c)
-{
- return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
-}
+#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
+static const char hexdigits[] = "0123456789abcdef";
/* urlencode_str() writes data into an output dynbuf and URL-encodes the
* spaces in the source URL accordingly.
*
@@ -167,7 +175,10 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
left = FALSE;
if(urlchar_needs_escaping(*iptr)) {
- if(Curl_dyn_addf(o, "%%%02x", *iptr))
+ char out[3]={'%'};
+ out[1] = hexdigits[*iptr>>4];
+ out[2] = hexdigits[*iptr & 0xf];
+ if(Curl_dyn_addn(o, out, 3))
return CURLUE_OUT_OF_MEMORY;
}
else {
@@ -190,26 +201,27 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
bool guess_scheme)
{
- int i;
+ int i = 0;
DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
(void)buflen; /* only used in debug-builds */
if(buf)
buf[0] = 0; /* always leave a defined value in buf */
-#ifdef WIN32
+#ifdef _WIN32
if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
return 0;
#endif
- for(i = 0; i < MAX_SCHEME_LEN; ++i) {
- char s = url[i];
- if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
- /* RFC 3986 3.1 explains:
- scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- */
- }
- else {
- break;
+ if(ISALPHA(url[0]))
+ for(i = 1; i < MAX_SCHEME_LEN; ++i) {
+ char s = url[i];
+ if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
+ /* RFC 3986 3.1 explains:
+ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ */
+ }
+ else {
+ break;
+ }
}
- }
if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
/* If this does not guess scheme, the scheme always ends with the colon so
that this also detects data: URLs etc. In guessing mode, data: could
@@ -326,7 +338,7 @@ static char *concat_url(char *base, const char *relurl)
pathsep = strchr(protsep, '/');
if(pathsep) {
/* When people use badly formatted URLs, such as
- "http://www.url.com?dir=/home/daniel" we must not use the first
+ "http://www.example.com?dir=/home/daniel" we must not use the first
slash, if there's a ?-letter before it! */
char *sep = strchr(protsep, '?');
if(sep && (sep < pathsep))
@@ -335,9 +347,9 @@ static char *concat_url(char *base, const char *relurl)
}
else {
/* There was no slash. Now, since we might be operating on a badly
- formatted URL, such as "http://www.url.com?id=2380" which doesn't
- use a slash separator as it is supposed to, we need to check for a
- ?-letter as well! */
+ formatted URL, such as "http://www.example.com?id=2380" which
+ doesn't use a slash separator as it is supposed to, we need to check
+ for a ?-letter as well! */
pathsep = strchr(protsep, '?');
if(pathsep)
*pathsep = 0;
@@ -365,27 +377,30 @@ static char *concat_url(char *base, const char *relurl)
return Curl_dyn_ptr(&newest);
}
-/* scan for byte values < 31 or 127 */
-static bool junkscan(const char *part, unsigned int flags)
+/* scan for byte values <= 31, 127 and sometimes space */
+static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
{
- if(part) {
- static const char badbytes[]={
- /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
- 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
- 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x7f, 0x00 /* null-terminate */
- };
- size_t n = strlen(part);
- size_t nfine = strcspn(part, badbytes);
- if(nfine != n)
- /* since we don't know which part is scanned, return a generic error
- code */
- return TRUE;
- if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
- return TRUE;
- }
- return FALSE;
+ static const char badbytes[]={
+ /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x7f, 0x00 /* null-terminate */
+ };
+ size_t n = strlen(url);
+ size_t nfine;
+
+ if(n > CURL_MAX_INPUT_LENGTH)
+ /* excessive input length */
+ return CURLUE_MALFORMED_INPUT;
+
+ nfine = strcspn(url, badbytes);
+ if((nfine != n) ||
+ (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
+ return CURLUE_MALFORMED_INPUT;
+
+ *urllen = n;
+ return CURLUE_OK;
}
/*
@@ -396,8 +411,10 @@ static bool junkscan(const char *part, unsigned int flags)
*
*/
static CURLUcode parse_hostname_login(struct Curl_URL *u,
- struct dynbuf *host,
- unsigned int flags)
+ const char *login,
+ size_t len,
+ unsigned int flags,
+ size_t *offset) /* to the host name */
{
CURLUcode result = CURLUE_OK;
CURLcode ccode;
@@ -413,13 +430,12 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
*
* We need somewhere to put the embedded details, so do that first.
*/
-
- char *login = Curl_dyn_ptr(host);
char *ptr;
DEBUGASSERT(login);
- ptr = strchr(login, '@');
+ *offset = 0;
+ ptr = memchr(login, '@', len);
if(!ptr)
goto out;
@@ -430,7 +446,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
/* if this is a known scheme, get some details */
if(u->scheme)
- h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+ h = Curl_get_scheme_handler(u->scheme);
/* We could use the login information in the URL so extract it. Only parse
options if the handler says we should. Note that 'h' might be NULL! */
@@ -449,35 +465,25 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
result = CURLUE_USER_NOT_ALLOWED;
goto out;
}
- if(junkscan(userp, flags)) {
- result = CURLUE_BAD_USER;
- goto out;
- }
+ free(u->user);
u->user = userp;
}
if(passwdp) {
- if(junkscan(passwdp, flags)) {
- result = CURLUE_BAD_PASSWORD;
- goto out;
- }
+ free(u->password);
u->password = passwdp;
}
if(optionsp) {
- if(junkscan(optionsp, flags)) {
- result = CURLUE_BAD_LOGIN;
- goto out;
- }
+ free(u->options);
u->options = optionsp;
}
- /* move the name to the start of the host buffer */
- if(Curl_dyn_tail(host, strlen(ptr)))
- return CURLUE_OUT_OF_MEMORY;
-
+ /* the host name starts at this offset */
+ *offset = ptr - login;
return CURLUE_OK;
- out:
+
+out:
free(userp);
free(passwdp);
@@ -492,35 +498,20 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
bool has_scheme)
{
- char *portptr = NULL;
- char endbracket;
- int len;
+ char *portptr;
char *hostname = Curl_dyn_ptr(host);
/*
- * Find the end of an IPv6 address, either on the ']' ending bracket or
- * a percent-encoded zone index.
+ * Find the end of an IPv6 address on the ']' ending bracket.
*/
- if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
- &endbracket, &len)) {
- if(']' == endbracket)
- portptr = &hostname[len];
- else if('%' == endbracket) {
- int zonelen = len;
- if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
- if(']' != endbracket)
- return CURLUE_BAD_IPV6;
- portptr = &hostname[--zonelen + len + 1];
- }
- else
- return CURLUE_BAD_IPV6;
- }
- else
+ if(hostname[0] == '[') {
+ portptr = strchr(hostname, ']');
+ if(!portptr)
return CURLUE_BAD_IPV6;
-
+ portptr++;
/* this is a RFC2732-style specified IP-address */
- if(portptr && *portptr) {
+ if(*portptr) {
if(*portptr != ':')
- return CURLUE_BAD_IPV6;
+ return CURLUE_BAD_PORT_NUMBER;
}
else
portptr = NULL;
@@ -531,7 +522,6 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
if(portptr) {
char *rest;
long port;
- char portbuf[7];
size_t keep = portptr - hostname;
/* Browser behavior adaptation. If there's a colon with no digits after,
@@ -557,11 +547,10 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
if(rest[0])
return CURLUE_BAD_PORT_NUMBER;
- *rest = 0;
- /* generate a new port number string to get rid of leading zeroes etc */
- msnprintf(portbuf, sizeof(portbuf), "%ld", port);
u->portnum = port;
- u->port = strdup(portbuf);
+ /* generate a new port number string to get rid of leading zeroes etc */
+ free(u->port);
+ u->port = aprintf("%ld", port);
if(!u->port)
return CURLUE_OUT_OF_MEMORY;
}
@@ -569,74 +558,79 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
return CURLUE_OK;
}
-static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
- size_t hlen) /* length of hostname */
+/* this assumes 'hostname' now starts with [ */
+static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
+ size_t hlen) /* length of hostname */
{
size_t len;
- DEBUGASSERT(hostname);
-
- if(!hostname[0])
- return CURLUE_NO_HOST;
- else if(hostname[0] == '[') {
- const char *l = "0123456789abcdefABCDEF:.";
- if(hlen < 4) /* '[::]' is the shortest possible valid string */
+ DEBUGASSERT(*hostname == '[');
+ if(hlen < 4) /* '[::]' is the shortest possible valid string */
+ return CURLUE_BAD_IPV6;
+ hostname++;
+ hlen -= 2;
+
+ /* only valid IPv6 letters are ok */
+ len = strspn(hostname, "0123456789abcdefABCDEF:.");
+
+ if(hlen != len) {
+ hlen = len;
+ if(hostname[len] == '%') {
+ /* this could now be '%[zone id]' */
+ char zoneid[16];
+ int i = 0;
+ char *h = &hostname[len + 1];
+ /* pass '25' if present and is a url encoded percent sign */
+ if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
+ h += 2;
+ while(*h && (*h != ']') && (i < 15))
+ zoneid[i++] = *h++;
+ if(!i || (']' != *h))
+ return CURLUE_BAD_IPV6;
+ zoneid[i] = 0;
+ u->zoneid = strdup(zoneid);
+ if(!u->zoneid)
+ return CURLUE_OUT_OF_MEMORY;
+ hostname[len] = ']'; /* insert end bracket */
+ hostname[len + 1] = 0; /* terminate the hostname */
+ }
+ else
return CURLUE_BAD_IPV6;
- hostname++;
- hlen -= 2;
+ /* hostname is fine */
+ }
- if(hostname[hlen] != ']')
+ /* Check the IPv6 address. */
+ {
+ char dest[16]; /* fits a binary IPv6 address */
+ char norm[MAX_IPADR_LEN];
+ hostname[hlen] = 0; /* end the address there */
+ if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
return CURLUE_BAD_IPV6;
- /* only valid letters are ok */
- len = strspn(hostname, l);
- if(hlen != len) {
- hlen = len;
- if(hostname[len] == '%') {
- /* this could now be '%[zone id]' */
- char zoneid[16];
- int i = 0;
- char *h = &hostname[len + 1];
- /* pass '25' if present and is a url encoded percent sign */
- if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
- h += 2;
- while(*h && (*h != ']') && (i < 15))
- zoneid[i++] = *h++;
- if(!i || (']' != *h))
- /* impossible to reach? */
- return CURLUE_MALFORMED_INPUT;
- zoneid[i] = 0;
- u->zoneid = strdup(zoneid);
- if(!u->zoneid)
- return CURLUE_OUT_OF_MEMORY;
- hostname[len] = ']'; /* insert end bracket */
- hostname[len + 1] = 0; /* terminate the hostname */
- }
- else
- return CURLUE_BAD_IPV6;
- /* hostname is fine */
- }
-#ifdef ENABLE_IPV6
- {
- char dest[16]; /* fits a binary IPv6 address */
- char norm[MAX_IPADR_LEN];
- hostname[hlen] = 0; /* end the address there */
- if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
- return CURLUE_BAD_IPV6;
-
- /* check if it can be done shorter */
- if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
- (strlen(norm) < hlen)) {
- strcpy(hostname, norm);
- hlen = strlen(norm);
- hostname[hlen + 1] = 0;
- }
- hostname[hlen] = ']'; /* restore ending bracket */
+ /* check if it can be done shorter */
+ if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
+ (strlen(norm) < hlen)) {
+ strcpy(hostname, norm);
+ hlen = strlen(norm);
+ hostname[hlen + 1] = 0;
}
-#endif
+ hostname[hlen] = ']'; /* restore ending bracket */
}
+ return CURLUE_OK;
+}
+
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
+ size_t hlen) /* length of hostname */
+{
+ size_t len;
+ DEBUGASSERT(hostname);
+
+ if(!hlen)
+ return CURLUE_NO_HOST;
+ else if(hostname[0] == '[')
+ return ipv6_parse(u, hostname, hlen);
else {
/* letters from the second string are not ok */
- len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,");
+ len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
if(hlen != len)
/* hostname with bad content */
return CURLUE_BAD_HOSTNAME;
@@ -644,50 +638,52 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
return CURLUE_OK;
}
-#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
-
/*
* Handle partial IPv4 numerical addresses and different bases, like
* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
*
- * If the given input string is syntactically wrong or any part for example is
- * too big, this function returns FALSE and doesn't create any output.
+ * If the given input string is syntactically wrong IPv4 or any part for
+ * example is too big, this function returns HOST_NAME.
*
* Output the "normalized" version of that input string in plain quad decimal
- * integers and return TRUE.
+ * integers.
+ *
+ * Returns the host type.
*/
-static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
+
+#define HOST_ERROR -1 /* out of memory */
+#define HOST_BAD -2 /* bad IPv4 address */
+
+#define HOST_NAME 1
+#define HOST_IPV4 2
+#define HOST_IPV6 3
+
+static int ipv4_normalize(struct dynbuf *host)
{
bool done = FALSE;
int n = 0;
- const char *c = hostname;
+ const char *c = Curl_dyn_ptr(host);
unsigned long parts[4] = {0, 0, 0, 0};
+ CURLcode result = CURLE_OK;
+
+ if(*c == '[')
+ return HOST_IPV6;
while(!done) {
char *endp;
unsigned long l;
- if((*c < '0') || (*c > '9'))
+ if(!ISDIGIT(*c))
/* most importantly this doesn't allow a leading plus or minus */
- return FALSE;
+ return HOST_NAME;
l = strtoul(c, &endp, 0);
- /* overflow or nothing parsed at all */
- if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
- return FALSE;
-
-#if SIZEOF_LONG > 4
- /* a value larger than 32 bits */
- if(l > UINT_MAX)
- return FALSE;
-#endif
-
parts[n] = l;
c = endp;
- switch (*c) {
- case '.' :
+ switch(*c) {
+ case '.':
if(n == 3)
- return FALSE;
+ return HOST_NAME;
n++;
c++;
break;
@@ -697,51 +693,63 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
break;
default:
- return FALSE;
+ return HOST_NAME;
}
- }
- /* this is deemed a valid IPv4 numerical address */
+ /* overflow */
+ if((l == ULONG_MAX) && (errno == ERANGE))
+ return HOST_NAME;
+
+#if SIZEOF_LONG > 4
+ /* a value larger than 32 bits */
+ if(l > UINT_MAX)
+ return HOST_NAME;
+#endif
+ }
switch(n) {
case 0: /* a -- 32 bits */
- msnprintf(outp, olen, "%u.%u.%u.%u",
- parts[0] >> 24, (parts[0] >> 16) & 0xff,
- (parts[0] >> 8) & 0xff, parts[0] & 0xff);
+ Curl_dyn_reset(host);
+
+ result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+ parts[0] >> 24, (parts[0] >> 16) & 0xff,
+ (parts[0] >> 8) & 0xff, parts[0] & 0xff);
break;
case 1: /* a.b -- 8.24 bits */
if((parts[0] > 0xff) || (parts[1] > 0xffffff))
- return FALSE;
- msnprintf(outp, olen, "%u.%u.%u.%u",
- parts[0], (parts[1] >> 16) & 0xff,
- (parts[1] >> 8) & 0xff, parts[1] & 0xff);
+ return HOST_NAME;
+ Curl_dyn_reset(host);
+ result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+ parts[0], (parts[1] >> 16) & 0xff,
+ (parts[1] >> 8) & 0xff, parts[1] & 0xff);
break;
case 2: /* a.b.c -- 8.8.16 bits */
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
- return FALSE;
- msnprintf(outp, olen, "%u.%u.%u.%u",
- parts[0], parts[1], (parts[2] >> 8) & 0xff,
- parts[2] & 0xff);
+ return HOST_NAME;
+ Curl_dyn_reset(host);
+ result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+ parts[0], parts[1], (parts[2] >> 8) & 0xff,
+ parts[2] & 0xff);
break;
case 3: /* a.b.c.d -- 8.8.8.8 bits */
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
(parts[3] > 0xff))
- return FALSE;
- msnprintf(outp, olen, "%u.%u.%u.%u",
- parts[0], parts[1], parts[2], parts[3]);
+ return HOST_NAME;
+ Curl_dyn_reset(host);
+ result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+ parts[0], parts[1], parts[2], parts[3]);
break;
}
- return TRUE;
+ if(result)
+ return HOST_ERROR;
+ return HOST_IPV4;
}
/* if necessary, replace the host content with a URL decoded version */
-static CURLUcode decode_host(struct dynbuf *host)
+static CURLUcode urldecode_host(struct dynbuf *host)
{
char *per = NULL;
const char *hostname = Curl_dyn_ptr(host);
- if(hostname[0] == '[')
- /* only decode if not an ipv6 numerical */
- return CURLUE_OK;
per = strchr(hostname, '%');
if(!per)
/* nothing to decode */
@@ -764,6 +772,78 @@ static CURLUcode decode_host(struct dynbuf *host)
return CURLUE_OK;
}
+static CURLUcode parse_authority(struct Curl_URL *u,
+ const char *auth, size_t authlen,
+ unsigned int flags,
+ struct dynbuf *host,
+ bool has_scheme)
+{
+ size_t offset;
+ CURLUcode result;
+
+ /*
+ * Parse the login details and strip them out of the host name.
+ */
+ result = parse_hostname_login(u, auth, authlen, flags, &offset);
+ if(result)
+ goto out;
+
+ if(Curl_dyn_addn(host, auth + offset, authlen - offset)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto out;
+ }
+
+ result = Curl_parse_port(u, host, has_scheme);
+ if(result)
+ goto out;
+
+ if(!Curl_dyn_len(host))
+ return CURLUE_NO_HOST;
+
+ switch(ipv4_normalize(host)) {
+ case HOST_IPV4:
+ break;
+ case HOST_IPV6:
+ result = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
+ break;
+ case HOST_NAME:
+ result = urldecode_host(host);
+ if(!result)
+ result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
+ break;
+ case HOST_ERROR:
+ result = CURLUE_OUT_OF_MEMORY;
+ break;
+ case HOST_BAD:
+ default:
+ result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
+ break;
+ }
+
+out:
+ return result;
+}
+
+CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
+ unsigned int flags)
+{
+ CURLUcode result;
+ struct dynbuf host;
+
+ DEBUGASSERT(authority);
+ Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
+
+ result = parse_authority(u, authority, strlen(authority), flags,
+ &host, !!u->scheme);
+ if(result)
+ Curl_dyn_free(&host);
+ else {
+ free(u->host);
+ u->host = Curl_dyn_ptr(&host);
+ }
+ return result;
+}
+
/*
* "Remove Dot Segments"
* https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
@@ -782,32 +862,27 @@ static CURLUcode decode_host(struct dynbuf *host)
*
* RETURNS
*
- * an allocated dedotdotified output string
+ * Zero for success and 'out' set to an allocated dedotdotified string.
*/
-UNITTEST char *dedotdotify(const char *input, size_t clen);
-UNITTEST char *dedotdotify(const char *input, size_t clen)
+UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
+UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
{
- char *out = malloc(clen + 1);
char *outptr;
- const char *orginput = input;
- char *queryp;
+ const char *endp = &input[clen];
+ char *out;
+
+ *outp = NULL;
+ /* the path always starts with a slash, and a slash has not dot */
+ if((clen < 2) || !memchr(input, '.', clen))
+ return 0;
+
+ out = malloc(clen + 1);
if(!out)
- return NULL; /* out of memory */
+ return 1; /* out of memory */
*out = 0; /* null-terminates, for inputs like "./" */
outptr = out;
- if(!*input)
- /* zero length input string, return that */
- return out;
-
- /*
- * To handle query-parts properly, we must find it and remove it during the
- * dotdot-operation and then append it again at the end to the output
- * string.
- */
- queryp = strchr(input, '?');
-
do {
bool dotdot = TRUE;
if(*input == '.') {
@@ -893,30 +968,20 @@ UNITTEST char *dedotdotify(const char *input, size_t clen)
*outptr = 0;
}
- /* continue until end of input string OR, if there is a terminating
- query part, stop there */
- } while(*input && (!queryp || (input < queryp)));
-
- if(queryp) {
- size_t qlen;
- /* There was a query part, append that to the output. */
- size_t oindex = queryp - orginput;
- qlen = strlen(&orginput[oindex]);
- memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
- }
+ /* continue until end of path */
+ } while(input < endp);
- return out;
+ *outp = out;
+ return 0; /* success */
}
static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
{
const char *path;
size_t pathlen;
- bool uncpath = FALSE;
char *query = NULL;
char *fragment = NULL;
char schemebuf[MAX_SCHEME_LEN + 1];
- const char *schemep = NULL;
size_t schemelen = 0;
size_t urllen;
CURLUcode result = CURLUE_OK;
@@ -927,16 +992,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
- /*************************************************************
- * Parse the URL.
- ************************************************************/
- /* allocate scratch area */
- urllen = strlen(url);
- if(urllen > CURL_MAX_INPUT_LENGTH) {
- /* excessive input length */
- result = CURLUE_MALFORMED_INPUT;
+ result = junkscan(url, &urllen, flags);
+ if(result)
goto fail;
- }
schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
flags & (CURLU_GUESS_SCHEME|
@@ -944,6 +1002,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
/* handle the file: scheme */
if(schemelen && !strcmp(schemebuf, "file")) {
+ bool uncpath = FALSE;
if(urllen <= 6) {
/* file:/ is not enough to actually be a complete file: URL */
result = CURLUE_BAD_FILE_URL;
@@ -952,8 +1011,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
/* path has been allocated large enough to hold this */
path = (char *)&url[5];
+ pathlen = urllen - 5;
- schemep = u->scheme = strdup("file");
+ u->scheme = strdup("file");
if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
@@ -996,7 +1056,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
ptr += 9; /* now points to the slash after the host */
}
else {
-#if defined(WIN32)
+#if defined(_WIN32)
size_t len;
/* the host name, NetBIOS computer name, can not contain disallowed
@@ -1028,13 +1088,14 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
}
path = ptr;
+ pathlen = urllen - (ptr - url);
}
if(!uncpath)
/* no host for file: URLs by default */
Curl_dyn_reset(&host);
-#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
+#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
/* Don't allow Windows drive letters when not in Windows.
* This catches both "file:/c:" and "file:c:" */
if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
@@ -1048,40 +1109,38 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
/* This cannot be done with strcpy, as the memory chunks overlap! */
path++;
+ pathlen--;
}
#endif
}
else {
/* clear path */
- const char *p;
+ const char *schemep = NULL;
const char *hostp;
- size_t len;
+ size_t hostlen;
if(schemelen) {
int i = 0;
- p = &url[schemelen + 1];
- while(p && (*p == '/') && (i < 4)) {
+ const char *p = &url[schemelen + 1];
+ while((*p == '/') && (i < 4)) {
p++;
i++;
}
schemep = schemebuf;
- if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
+ if(!Curl_get_scheme_handler(schemep) &&
!(flags & CURLU_NON_SUPPORT_SCHEME)) {
result = CURLUE_UNSUPPORTED_SCHEME;
goto fail;
}
- if((i < 1) || (i>3)) {
+ if((i < 1) || (i > 3)) {
/* less than one or more than three slashes */
result = CURLUE_BAD_SLASHES;
goto fail;
}
- if(junkscan(schemep, flags)) {
- result = CURLUE_BAD_SCHEME;
- goto fail;
- }
+ hostp = p; /* host name starts here */
}
else {
/* no scheme! */
@@ -1096,63 +1155,101 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
/*
* The URL was badly formatted, let's try without scheme specified.
*/
- p = url;
+ hostp = url;
}
- hostp = p; /* host name starts here */
-
- /* find the end of the host name + port number */
- while(*p && !HOSTNAME_END(*p))
- p++;
- len = p - hostp;
- if(len) {
- if(Curl_dyn_addn(&host, hostp, len)) {
+ if(schemep) {
+ u->scheme = strdup(schemep);
+ if(!u->scheme) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
- else {
- if(!(flags & CURLU_NO_AUTHORITY)) {
- result = CURLUE_NO_HOST;
+
+ /* find the end of the host name + port number */
+ hostlen = strcspn(hostp, "/?#");
+ path = &hostp[hostlen];
+
+ /* this pathlen also contains the query and the fragment */
+ pathlen = urllen - (path - url);
+ if(hostlen) {
+
+ result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
+ if(result)
goto fail;
- }
- }
- path = (char *)p;
+ if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+ const char *hostname = Curl_dyn_ptr(&host);
+ /* legacy curl-style guess based on host name */
+ if(checkprefix("ftp.", hostname))
+ schemep = "ftp";
+ else if(checkprefix("dict.", hostname))
+ schemep = "dict";
+ else if(checkprefix("ldap.", hostname))
+ schemep = "ldap";
+ else if(checkprefix("imap.", hostname))
+ schemep = "imap";
+ else if(checkprefix("smtp.", hostname))
+ schemep = "smtp";
+ else if(checkprefix("pop3.", hostname))
+ schemep = "pop3";
+ else
+ schemep = "http";
- if(schemep) {
- u->scheme = strdup(schemep);
- if(!u->scheme) {
+ u->scheme = strdup(schemep);
+ if(!u->scheme) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ }
+ }
+ else if(flags & CURLU_NO_AUTHORITY) {
+ /* allowed to be empty. */
+ if(Curl_dyn_add(&host, "")) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
}
+ else {
+ result = CURLUE_NO_HOST;
+ goto fail;
+ }
}
fragment = strchr(path, '#');
if(fragment) {
- fraglen = strlen(fragment);
+ fraglen = pathlen - (fragment - path);
if(fraglen > 1) {
/* skip the leading '#' in the copy but include the terminating null */
- u->fragment = Curl_memdup(fragment + 1, fraglen);
- if(!u->fragment) {
- result = CURLUE_OUT_OF_MEMORY;
- goto fail;
+ if(flags & CURLU_URLENCODE) {
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ if(urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE)) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
+ u->fragment = Curl_dyn_ptr(&enc);
}
-
- if(junkscan(u->fragment, flags)) {
- result = CURLUE_BAD_FRAGMENT;
- goto fail;
+ else {
+ u->fragment = Curl_strndup(fragment + 1, fraglen - 1);
+ if(!u->fragment) {
+ result = CURLUE_OUT_OF_MEMORY;
+ goto fail;
+ }
}
}
+ /* after this, pathlen still contains the query */
+ pathlen -= fraglen;
}
- query = strchr(path, '?');
- if(query && (!fragment || (query < fragment))) {
- size_t qlen = strlen(query) - fraglen; /* includes '?' */
- pathlen = strlen(path) - qlen - fraglen;
+ DEBUGASSERT(pathlen < urllen);
+ query = memchr(path, '?', pathlen);
+ if(query) {
+ size_t qlen = fragment ? (size_t)(fragment - query) :
+ pathlen - (query - path);
+ pathlen -= qlen;
if(qlen > 1) {
- if(qlen && (flags & CURLU_URLENCODE)) {
+ if(flags & CURLU_URLENCODE) {
struct dynbuf enc;
Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
/* skip the leading question mark */
@@ -1163,17 +1260,11 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
u->query = Curl_dyn_ptr(&enc);
}
else {
- u->query = Curl_memdup(query + 1, qlen);
+ u->query = Curl_strndup(query + 1, qlen - 1);
if(!u->query) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
- u->query[qlen - 1] = 0;
- }
-
- if(junkscan(u->query, flags)) {
- result = CURLUE_BAD_QUERY;
- goto fail;
}
}
else {
@@ -1185,8 +1276,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
}
}
}
- else
- pathlen = strlen(path) - fraglen;
if(pathlen && (flags & CURLU_URLENCODE)) {
struct dynbuf enc;
@@ -1199,111 +1288,42 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
path = u->path = Curl_dyn_ptr(&enc);
}
- if(!pathlen) {
- /* there is no path left, unset */
+ if(pathlen <= 1) {
+ /* there is no path left or just the slash, unset */
path = NULL;
}
else {
if(!u->path) {
- u->path = Curl_memdup(path, pathlen + 1);
+ u->path = Curl_strndup(path, pathlen);
if(!u->path) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
- u->path[pathlen] = 0;
path = u->path;
}
else if(flags & CURLU_URLENCODE)
/* it might have encoded more than just the path so cut it */
u->path[pathlen] = 0;
- if(junkscan(u->path, flags)) {
- result = CURLUE_BAD_PATH;
- goto fail;
- }
-
if(!(flags & CURLU_PATH_AS_IS)) {
/* remove ../ and ./ sequences according to RFC3986 */
- char *newp = dedotdotify((char *)path, pathlen);
- if(!newp) {
+ char *dedot;
+ int err = dedotdotify((char *)path, pathlen, &dedot);
+ if(err) {
result = CURLUE_OUT_OF_MEMORY;
goto fail;
}
- free(u->path);
- u->path = newp;
- }
- }
-
- if(Curl_dyn_len(&host)) {
- char normalized_ipv4[sizeof("255.255.255.255") + 1];
-
- /*
- * Parse the login details and strip them out of the host name.
- */
- result = parse_hostname_login(u, &host, flags);
- if(!result)
- result = Curl_parse_port(u, &host, schemelen);
- if(result)
- goto fail;
-
- if(junkscan(Curl_dyn_ptr(&host), flags)) {
- result = CURLUE_BAD_HOSTNAME;
- goto fail;
- }
-
- if(ipv4_normalize(Curl_dyn_ptr(&host),
- normalized_ipv4, sizeof(normalized_ipv4))) {
- Curl_dyn_reset(&host);
- if(Curl_dyn_add(&host, normalized_ipv4)) {
- result = CURLUE_OUT_OF_MEMORY;
- goto fail;
+ if(dedot) {
+ free(u->path);
+ u->path = dedot;
}
}
- else {
- result = decode_host(&host);
- if(!result)
- result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
- if(result)
- goto fail;
- }
-
- if((flags & CURLU_GUESS_SCHEME) && !schemep) {
- const char *hostname = Curl_dyn_ptr(&host);
- /* legacy curl-style guess based on host name */
- if(checkprefix("ftp.", hostname))
- schemep = "ftp";
- else if(checkprefix("dict.", hostname))
- schemep = "dict";
- else if(checkprefix("ldap.", hostname))
- schemep = "ldap";
- else if(checkprefix("imap.", hostname))
- schemep = "imap";
- else if(checkprefix("smtp.", hostname))
- schemep = "smtp";
- else if(checkprefix("pop3.", hostname))
- schemep = "pop3";
- else
- schemep = "http";
-
- u->scheme = strdup(schemep);
- if(!u->scheme) {
- result = CURLUE_OUT_OF_MEMORY;
- goto fail;
- }
- }
- }
- else if(flags & CURLU_NO_AUTHORITY) {
- /* allowed to be empty. */
- if(Curl_dyn_add(&host, "")) {
- result = CURLUE_OUT_OF_MEMORY;
- goto fail;
- }
}
u->host = Curl_dyn_ptr(&host);
return result;
- fail:
+fail:
Curl_dyn_free(&host);
free_urlhandle(u);
return result;
@@ -1330,7 +1350,7 @@ static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
*/
CURLU *curl_url(void)
{
- return calloc(sizeof(struct Curl_URL), 1);
+ return calloc(1, sizeof(struct Curl_URL));
}
void curl_url_cleanup(CURLU *u)
@@ -1350,9 +1370,9 @@ void curl_url_cleanup(CURLU *u)
} \
} while(0)
-CURLU *curl_url_dup(CURLU *in)
+CURLU *curl_url_dup(const CURLU *in)
{
- struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
+ struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
if(u) {
DUP(u, in, scheme);
DUP(u, in, user);
@@ -1363,22 +1383,25 @@ CURLU *curl_url_dup(CURLU *in)
DUP(u, in, path);
DUP(u, in, query);
DUP(u, in, fragment);
+ DUP(u, in, zoneid);
u->portnum = in->portnum;
}
return u;
- fail:
+fail:
curl_url_cleanup(u);
return NULL;
}
-CURLUcode curl_url_get(CURLU *u, CURLUPart what,
+CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
char **part, unsigned int flags)
{
- char *ptr;
+ const char *ptr;
CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
char portbuf[7];
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
+ bool punycode = FALSE;
+ bool depunyfy = FALSE;
bool plusdecode = FALSE;
(void)flags;
if(!u)
@@ -1408,6 +1431,8 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
case CURLUPART_HOST:
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
+ punycode = (flags & CURLU_PUNYCODE)?1:0;
+ depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
@@ -1420,8 +1445,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
/* there's no stored port number, but asked to deliver
a default one for the scheme */
- const struct Curl_handler *h =
- Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+ const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
if(h) {
msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
ptr = portbuf;
@@ -1430,8 +1454,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
else if(ptr && u->scheme) {
/* there is a stored port number, but ask to inhibit if
it matches the default one for the scheme */
- const struct Curl_handler *h =
- Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+ const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
if(h && (h->defport == u->portnum) &&
(flags & CURLU_NO_DEFAULT_PORT))
ptr = NULL;
@@ -1439,11 +1462,8 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
break;
case CURLUPART_PATH:
ptr = u->path;
- if(!ptr) {
- ptr = u->path = strdup("/");
- if(!u->path)
- return CURLUE_OUT_OF_MEMORY;
- }
+ if(!ptr)
+ ptr = "/";
break;
case CURLUPART_QUERY:
ptr = u->query;
@@ -1460,6 +1480,8 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
char *options = u->options;
char *port = u->port;
char *allochost = NULL;
+ punycode = (flags & CURLU_PUNYCODE)?1:0;
+ depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
@@ -1477,7 +1499,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
else
return CURLUE_NO_SCHEME;
- h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
+ h = Curl_get_scheme_handler(scheme);
if(!port && (flags & CURLU_DEFAULT_PORT)) {
/* there's no stored port number, but asked to deliver
a default one for the scheme */
@@ -1514,39 +1536,33 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
}
- else {
- /* only encode '%' in output host name */
- char *host = u->host;
- bool percent = FALSE;
- /* first, count number of percents present in the name */
- while(*host) {
- if(*host == '%') {
- percent = TRUE;
- break;
- }
- host++;
+ else if(punycode) {
+ if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ CURLcode result = Curl_idn_decode(u->host, &allochost);
+ if(result)
+ return (result == CURLE_OUT_OF_MEMORY) ?
+ CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+#endif
}
- /* if there were percent(s), encode the host name */
- if(percent) {
- struct dynbuf enc;
- CURLcode result;
- Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
- host = u->host;
- while(*host) {
- if(*host == '%')
- result = Curl_dyn_addn(&enc, "%25", 3);
- else
- result = Curl_dyn_addn(&enc, host, 1);
- if(result)
- return CURLUE_OUT_OF_MEMORY;
- host++;
- }
- free(u->host);
- u->host = Curl_dyn_ptr(&enc);
+ }
+ else if(depunyfy) {
+ if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ CURLcode result = Curl_idn_encode(u->host, &allochost);
+ if(result)
+ /* this is the most likely error */
+ return (result == CURLE_OUT_OF_MEMORY) ?
+ CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+#endif
}
}
- url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+ url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
scheme,
u->user ? u->user : "",
u->password ? ":": "",
@@ -1557,7 +1573,6 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
allochost ? allochost : u->host,
port ? ":": "",
port ? port : "",
- (u->path && (u->path[0] != '/')) ? "/": "",
u->path ? u->path : "/",
(u->query && u->query[0]) ? "?": "",
(u->query && u->query[0]) ? u->query : "",
@@ -1577,7 +1592,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
if(ptr) {
size_t partlen = strlen(ptr);
size_t i = 0;
- *part = Curl_memdup(ptr, partlen + 1);
+ *part = Curl_strndup(ptr, partlen);
if(!*part)
return CURLUE_OUT_OF_MEMORY;
if(plusdecode) {
@@ -1611,6 +1626,36 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
free(*part);
*part = Curl_dyn_ptr(&enc);
}
+ else if(punycode) {
+ if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ char *allochost;
+ CURLcode result = Curl_idn_decode(*part, &allochost);
+ if(result)
+ return (result == CURLE_OUT_OF_MEMORY) ?
+ CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+ free(*part);
+ *part = allochost;
+#endif
+ }
+ }
+ else if(depunyfy) {
+ if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
+#ifndef USE_IDN
+ return CURLUE_LACKS_IDN;
+#else
+ char *allochost;
+ CURLcode result = Curl_idn_encode(*part, &allochost);
+ if(result)
+ return (result == CURLE_OUT_OF_MEMORY) ?
+ CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+ free(*part);
+ *part = allochost;
+#endif
+ }
+ }
return CURLUE_OK;
}
@@ -1626,8 +1671,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
bool plusencode = FALSE;
bool urlskipslash = FALSE;
+ bool leadingslash = FALSE;
bool appendquery = FALSE;
bool equalsencode = FALSE;
+ size_t nalloc;
if(!u)
return CURLUE_BAD_HANDLE;
@@ -1680,18 +1727,36 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
return CURLUE_OK;
}
+ nalloc = strlen(part);
+ if(nalloc > CURL_MAX_INPUT_LENGTH)
+ /* excessive input length */
+ return CURLUE_MALFORMED_INPUT;
+
switch(what) {
- case CURLUPART_SCHEME:
- if(strlen(part) > MAX_SCHEME_LEN)
- /* too long */
+ case CURLUPART_SCHEME: {
+ size_t plen = strlen(part);
+ const char *s = part;
+ if((plen > MAX_SCHEME_LEN) || (plen < 1))
+ /* too long or too short */
return CURLUE_BAD_SCHEME;
- if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
- /* verify that it is a fine scheme */
- !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
+ /* verify that it is a fine scheme */
+ if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
return CURLUE_UNSUPPORTED_SCHEME;
storep = &u->scheme;
urlencode = FALSE; /* never */
+ if(ISALPHA(*s)) {
+ /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
+ while(--plen) {
+ if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
+ s++; /* fine */
+ else
+ return CURLUE_BAD_SCHEME;
+ }
+ }
+ else
+ return CURLUE_BAD_SCHEME;
break;
+ }
case CURLUPART_USER:
storep = &u->user;
break;
@@ -1701,15 +1766,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
case CURLUPART_OPTIONS:
storep = &u->options;
break;
- case CURLUPART_HOST: {
- size_t len = strcspn(part, " \r\n");
- if(strlen(part) != len)
- /* hostname with bad content */
- return CURLUE_BAD_HOSTNAME;
+ case CURLUPART_HOST:
storep = &u->host;
Curl_safefree(u->zoneid);
break;
- }
case CURLUPART_ZONEID:
storep = &u->zoneid;
break;
@@ -1728,6 +1788,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
break;
case CURLUPART_PATH:
urlskipslash = TRUE;
+ leadingslash = TRUE; /* enforce */
storep = &u->path;
break;
case CURLUPART_QUERY:
@@ -1750,6 +1811,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
char *oldurl;
char *redired_url;
+ if(!nalloc)
+ /* a blank URL is not a valid URL */
+ return CURLUE_MALFORMED_INPUT;
+
/* if the new thing is absolute or the old one is not
* (we could not get an absolute url in 'oldurl'),
* then replace the existing with the new. */
@@ -1776,18 +1841,17 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
}
DEBUGASSERT(storep);
{
- const char *newp = part;
- size_t nalloc = strlen(part);
-
- if(nalloc > CURL_MAX_INPUT_LENGTH)
- /* excessive input length */
- return CURLUE_MALFORMED_INPUT;
+ const char *newp;
+ struct dynbuf enc;
+ Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
+ if(leadingslash && (part[0] != '/')) {
+ CURLcode result = Curl_dyn_addn(&enc, "/", 1);
+ if(result)
+ return CURLUE_OUT_OF_MEMORY;
+ }
if(urlencode) {
const unsigned char *i;
- struct dynbuf enc;
-
- Curl_dyn_init(&enc, nalloc * 3 + 1);
for(i = (const unsigned char *)part; *i; i++) {
CURLcode result;
@@ -1796,7 +1860,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
if(result)
return CURLUE_OUT_OF_MEMORY;
}
- else if(Curl_isunreserved(*i) ||
+ else if(ISUNRESERVED(*i) ||
((*i == '/') && urlskipslash) ||
((*i == '=') && equalsencode)) {
if((*i == '=') && equalsencode)
@@ -1807,19 +1871,21 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
return CURLUE_OUT_OF_MEMORY;
}
else {
- result = Curl_dyn_addf(&enc, "%%%02x", *i);
+ char out[3]={'%'};
+ out[1] = hexdigits[*i>>4];
+ out[2] = hexdigits[*i & 0xf];
+ result = Curl_dyn_addn(&enc, out, 3);
if(result)
return CURLUE_OUT_OF_MEMORY;
}
}
- newp = Curl_dyn_ptr(&enc);
}
else {
char *p;
- newp = strdup(part);
- if(!newp)
+ CURLcode result = Curl_dyn_add(&enc, part);
+ if(result)
return CURLUE_OUT_OF_MEMORY;
- p = (char *)newp;
+ p = Curl_dyn_ptr(&enc);
while(*p) {
/* make sure percent encoded are lower case */
if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
@@ -1832,44 +1898,45 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
p++;
}
}
+ newp = Curl_dyn_ptr(&enc);
- if(appendquery) {
+ if(appendquery && newp) {
/* Append the 'newp' string onto the old query. Add a '&' separator if
none is present at the end of the existing query already */
size_t querylen = u->query ? strlen(u->query) : 0;
bool addamperand = querylen && (u->query[querylen -1] != '&');
if(querylen) {
- struct dynbuf enc;
- Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+ struct dynbuf qbuf;
+ Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
- if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+ if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
goto nomem;
if(addamperand) {
- if(Curl_dyn_addn(&enc, "&", 1))
+ if(Curl_dyn_addn(&qbuf, "&", 1))
goto nomem;
}
- if(Curl_dyn_add(&enc, newp))
+ if(Curl_dyn_add(&qbuf, newp))
goto nomem;
- free((char *)newp);
+ Curl_dyn_free(&enc);
free(*storep);
- *storep = Curl_dyn_ptr(&enc);
+ *storep = Curl_dyn_ptr(&qbuf);
return CURLUE_OK;
- nomem:
- free((char *)newp);
+nomem:
+ Curl_dyn_free(&enc);
return CURLUE_OUT_OF_MEMORY;
}
}
- if(what == CURLUPART_HOST) {
- size_t n = strlen(newp);
+ else if(what == CURLUPART_HOST) {
+ size_t n = Curl_dyn_len(&enc);
if(!n && (flags & CURLU_NO_AUTHORITY)) {
/* Skip hostname check, it's allowed to be empty. */
}
else {
- if(hostname_check(u, (char *)newp, n)) {
- free((char *)newp);
+ if(!n || hostname_check(u, (char *)newp, n)) {
+ Curl_dyn_free(&enc);
return CURLUE_BAD_HOSTNAME;
}
}