Library import 8 (#1074)

* Library import 8 * Add contrib/libs/cxxsupp/libcxx/include/__verbose_abort
author: AlexSm <alex@ydb.tech> 2024-01-18 11:28:56 +0100
committer: GitHub <noreply@github.com> 2024-01-18 11:28:56 +0100
commit: 9d0a3761b3201e0d9db879a7adf91876ebdb0564 (patch)
tree: 541d11ac878c18efd7ebca81e35112aa0fef995b /contrib/libs/curl/lib/urlapi.c
parent: 404ef8886ecc9736bc58ade6da2fbd83b486a408 (diff)
download: ydb-9d0a3761b3201e0d9db879a7adf91876ebdb0564.tar.gz
1 files changed, 527 insertions, 460 deletions
diff --git a/contrib/libs/curl/lib/urlapi.c b/contrib/libs/curl/lib/urlapi.c
index 7dac81c85c..0d11e48c92 100644
--- a/contrib/libs/curl/lib/urlapi.c
+++ b/contrib/libs/curl/lib/urlapi.c
@@ -5,7 +5,7 @@
  *                            | (__| |_| |  _ <| |___
  *                             \___|\___/|_| \_\_____|
  *
- * Copyright (C) 1998 - 2022, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
  *
  * This software is licensed as described in the file COPYING, which
  * you should have received as part of this distribution. The terms
@@ -33,6 +33,8 @@
 #include "inet_pton.h"
 #include "inet_ntop.h"
 #include "strdup.h"
+#include "idn.h"
+#include "curl_memrchr.h"
 
 /* The last 3 #include files should be in this order */
 #include "curl_printf.h"
@@ -56,6 +58,15 @@
 /* scheme is not URL encoded, the longest libcurl supported ones are... */
 #define MAX_SCHEME_LEN 40
 
+/*
+ * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
+ * sure we have _some_ value for AF_INET6 without polluting our fake value
+ * everywhere.
+ */
+#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
+#define AF_INET6 (AF_INET + 1)
+#endif
+
 /* Internal representation of CURLU. Point to URL-encoded strings. */
 struct Curl_URL {
   char *scheme;
@@ -89,7 +100,7 @@ static void free_urlhandle(struct Curl_URL *u)
 
 /*
  * Find the separator at the end of the host name, or the '?' in cases like
- * http://www.url.com?id=2380
+ * http://www.example.com?id=2380
  */
 static const char *find_host_sep(const char *url)
 {
@@ -116,14 +127,11 @@ static const char *find_host_sep(const char *url)
 }
 
 /*
- * Decide in an encoding-independent manner whether a character in a URL must
- * be escaped. This is used in urlencode_str().
+ * Decide whether a character in a URL must be escaped.
  */
-static bool urlchar_needs_escaping(int c)
-{
-  return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
-}
+#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
 
+static const char hexdigits[] = "0123456789abcdef";
 /* urlencode_str() writes data into an output dynbuf and URL-encodes the
  * spaces in the source URL accordingly.
  *
@@ -167,7 +175,10 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
       left = FALSE;
 
     if(urlchar_needs_escaping(*iptr)) {
-      if(Curl_dyn_addf(o, "%%%02x", *iptr))
+      char out[3]={'%'};
+      out[1] = hexdigits[*iptr>>4];
+      out[2] = hexdigits[*iptr & 0xf];
+      if(Curl_dyn_addn(o, out, 3))
         return CURLUE_OUT_OF_MEMORY;
     }
     else {
@@ -190,26 +201,27 @@ static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
 size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
                             bool guess_scheme)
 {
-  int i;
+  int i = 0;
   DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
   (void)buflen; /* only used in debug-builds */
   if(buf)
     buf[0] = 0; /* always leave a defined value in buf */
-#ifdef WIN32
+#ifdef _WIN32
   if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
     return 0;
 #endif
-  for(i = 0; i < MAX_SCHEME_LEN; ++i) {
-    char s = url[i];
-    if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
-      /* RFC 3986 3.1 explains:
-        scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
-      */
-    }
-    else {
-      break;
+  if(ISALPHA(url[0]))
+    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
+      char s = url[i];
+      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
+        /* RFC 3986 3.1 explains:
+           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+        */
+      }
+      else {
+        break;
+      }
     }
-  }
   if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
     /* If this does not guess scheme, the scheme always ends with the colon so
        that this also detects data: URLs etc. In guessing mode, data: could
@@ -326,7 +338,7 @@ static char *concat_url(char *base, const char *relurl)
       pathsep = strchr(protsep, '/');
       if(pathsep) {
         /* When people use badly formatted URLs, such as
-           "http://www.url.com?dir=/home/daniel" we must not use the first
+           "http://www.example.com?dir=/home/daniel" we must not use the first
            slash, if there's a ?-letter before it! */
         char *sep = strchr(protsep, '?');
         if(sep && (sep < pathsep))
@@ -335,9 +347,9 @@ static char *concat_url(char *base, const char *relurl)
       }
       else {
         /* There was no slash. Now, since we might be operating on a badly
-           formatted URL, such as "http://www.url.com?id=2380" which doesn't
-           use a slash separator as it is supposed to, we need to check for a
-           ?-letter as well! */
+           formatted URL, such as "http://www.example.com?id=2380" which
+           doesn't use a slash separator as it is supposed to, we need to check
+           for a ?-letter as well! */
         pathsep = strchr(protsep, '?');
         if(pathsep)
           *pathsep = 0;
@@ -365,27 +377,30 @@ static char *concat_url(char *base, const char *relurl)
   return Curl_dyn_ptr(&newest);
 }
 
-/* scan for byte values < 31 or 127 */
-static bool junkscan(const char *part, unsigned int flags)
+/* scan for byte values <= 31, 127 and sometimes space */
+static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
 {
-  if(part) {
-    static const char badbytes[]={
-      /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-      0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
-      0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
-      0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
-      0x7f, 0x00 /* null-terminate */
-    };
-    size_t n = strlen(part);
-    size_t nfine = strcspn(part, badbytes);
-    if(nfine != n)
-      /* since we don't know which part is scanned, return a generic error
-         code */
-      return TRUE;
-    if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
-      return TRUE;
-  }
-  return FALSE;
+  static const char badbytes[]={
+    /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x7f, 0x00 /* null-terminate */
+  };
+  size_t n = strlen(url);
+  size_t nfine;
+
+  if(n > CURL_MAX_INPUT_LENGTH)
+    /* excessive input length */
+    return CURLUE_MALFORMED_INPUT;
+
+  nfine = strcspn(url, badbytes);
+  if((nfine != n) ||
+     (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
+    return CURLUE_MALFORMED_INPUT;
+
+  *urllen = n;
+  return CURLUE_OK;
 }
 
 /*
@@ -396,8 +411,10 @@ static bool junkscan(const char *part, unsigned int flags)
  *
  */
 static CURLUcode parse_hostname_login(struct Curl_URL *u,
-                                      struct dynbuf *host,
-                                      unsigned int flags)
+                                      const char *login,
+                                      size_t len,
+                                      unsigned int flags,
+                                      size_t *offset) /* to the host name */
 {
   CURLUcode result = CURLUE_OK;
   CURLcode ccode;
@@ -413,13 +430,12 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
    *
    * We need somewhere to put the embedded details, so do that first.
    */
-
-  char *login = Curl_dyn_ptr(host);
   char *ptr;
 
   DEBUGASSERT(login);
 
-  ptr = strchr(login, '@');
+  *offset = 0;
+  ptr = memchr(login, '@', len);
   if(!ptr)
     goto out;
 
@@ -430,7 +446,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
 
   /* if this is a known scheme, get some details */
   if(u->scheme)
-    h = Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+    h = Curl_get_scheme_handler(u->scheme);
 
   /* We could use the login information in the URL so extract it. Only parse
      options if the handler says we should. Note that 'h' might be NULL! */
@@ -449,35 +465,25 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
       result = CURLUE_USER_NOT_ALLOWED;
       goto out;
     }
-    if(junkscan(userp, flags)) {
-      result = CURLUE_BAD_USER;
-      goto out;
-    }
+    free(u->user);
     u->user = userp;
   }
 
   if(passwdp) {
-    if(junkscan(passwdp, flags)) {
-      result = CURLUE_BAD_PASSWORD;
-      goto out;
-    }
+    free(u->password);
     u->password = passwdp;
   }
 
   if(optionsp) {
-    if(junkscan(optionsp, flags)) {
-      result = CURLUE_BAD_LOGIN;
-      goto out;
-    }
+    free(u->options);
     u->options = optionsp;
   }
 
-  /* move the name to the start of the host buffer */
-  if(Curl_dyn_tail(host, strlen(ptr)))
-    return CURLUE_OUT_OF_MEMORY;
-
+  /* the host name starts at this offset */
+  *offset = ptr - login;
   return CURLUE_OK;
-  out:
+
+out:
 
   free(userp);
   free(passwdp);
@@ -492,35 +498,20 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
 UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
                                    bool has_scheme)
 {
-  char *portptr = NULL;
-  char endbracket;
-  int len;
+  char *portptr;
   char *hostname = Curl_dyn_ptr(host);
   /*
-   * Find the end of an IPv6 address, either on the ']' ending bracket or
-   * a percent-encoded zone index.
+   * Find the end of an IPv6 address on the ']' ending bracket.
    */
-  if(1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n",
-                 &endbracket, &len)) {
-    if(']' == endbracket)
-      portptr = &hostname[len];
-    else if('%' == endbracket) {
-      int zonelen = len;
-      if(1 == sscanf(hostname + zonelen, "%*[^]]%c%n", &endbracket, &len)) {
-        if(']' != endbracket)
-          return CURLUE_BAD_IPV6;
-        portptr = &hostname[--zonelen + len + 1];
-      }
-      else
-        return CURLUE_BAD_IPV6;
-    }
-    else
+  if(hostname[0] == '[') {
+    portptr = strchr(hostname, ']');
+    if(!portptr)
       return CURLUE_BAD_IPV6;
-
+    portptr++;
     /* this is a RFC2732-style specified IP-address */
-    if(portptr && *portptr) {
+    if(*portptr) {
       if(*portptr != ':')
-        return CURLUE_BAD_IPV6;
+        return CURLUE_BAD_PORT_NUMBER;
     }
     else
       portptr = NULL;
@@ -531,7 +522,6 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
   if(portptr) {
     char *rest;
     long port;
-    char portbuf[7];
     size_t keep = portptr - hostname;
 
     /* Browser behavior adaptation. If there's a colon with no digits after,
@@ -557,11 +547,10 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
     if(rest[0])
       return CURLUE_BAD_PORT_NUMBER;
 
-    *rest = 0;
-    /* generate a new port number string to get rid of leading zeroes etc */
-    msnprintf(portbuf, sizeof(portbuf), "%ld", port);
     u->portnum = port;
-    u->port = strdup(portbuf);
+    /* generate a new port number string to get rid of leading zeroes etc */
+    free(u->port);
+    u->port = aprintf("%ld", port);
     if(!u->port)
       return CURLUE_OUT_OF_MEMORY;
   }
@@ -569,74 +558,79 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
   return CURLUE_OK;
 }
 
-static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
-                                size_t hlen) /* length of hostname */
+/* this assumes 'hostname' now starts with [ */
+static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
+                            size_t hlen) /* length of hostname */
 {
   size_t len;
-  DEBUGASSERT(hostname);
-
-  if(!hostname[0])
-    return CURLUE_NO_HOST;
-  else if(hostname[0] == '[') {
-    const char *l = "0123456789abcdefABCDEF:.";
-    if(hlen < 4) /* '[::]' is the shortest possible valid string */
+  DEBUGASSERT(*hostname == '[');
+  if(hlen < 4) /* '[::]' is the shortest possible valid string */
+    return CURLUE_BAD_IPV6;
+  hostname++;
+  hlen -= 2;
+
+  /* only valid IPv6 letters are ok */
+  len = strspn(hostname, "0123456789abcdefABCDEF:.");
+
+  if(hlen != len) {
+    hlen = len;
+    if(hostname[len] == '%') {
+      /* this could now be '%[zone id]' */
+      char zoneid[16];
+      int i = 0;
+      char *h = &hostname[len + 1];
+      /* pass '25' if present and is a url encoded percent sign */
+      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
+        h += 2;
+      while(*h && (*h != ']') && (i < 15))
+        zoneid[i++] = *h++;
+      if(!i || (']' != *h))
+        return CURLUE_BAD_IPV6;
+      zoneid[i] = 0;
+      u->zoneid = strdup(zoneid);
+      if(!u->zoneid)
+        return CURLUE_OUT_OF_MEMORY;
+      hostname[len] = ']'; /* insert end bracket */
+      hostname[len + 1] = 0; /* terminate the hostname */
+    }
+    else
       return CURLUE_BAD_IPV6;
-    hostname++;
-    hlen -= 2;
+    /* hostname is fine */
+  }
 
-    if(hostname[hlen] != ']')
+  /* Check the IPv6 address. */
+  {
+    char dest[16]; /* fits a binary IPv6 address */
+    char norm[MAX_IPADR_LEN];
+    hostname[hlen] = 0; /* end the address there */
+    if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
       return CURLUE_BAD_IPV6;
 
-    /* only valid letters are ok */
-    len = strspn(hostname, l);
-    if(hlen != len) {
-      hlen = len;
-      if(hostname[len] == '%') {
-        /* this could now be '%[zone id]' */
-        char zoneid[16];
-        int i = 0;
-        char *h = &hostname[len + 1];
-        /* pass '25' if present and is a url encoded percent sign */
-        if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
-          h += 2;
-        while(*h && (*h != ']') && (i < 15))
-          zoneid[i++] = *h++;
-        if(!i || (']' != *h))
-          /* impossible to reach? */
-          return CURLUE_MALFORMED_INPUT;
-        zoneid[i] = 0;
-        u->zoneid = strdup(zoneid);
-        if(!u->zoneid)
-          return CURLUE_OUT_OF_MEMORY;
-        hostname[len] = ']'; /* insert end bracket */
-        hostname[len + 1] = 0; /* terminate the hostname */
-      }
-      else
-        return CURLUE_BAD_IPV6;
-      /* hostname is fine */
-    }
-#ifdef ENABLE_IPV6
-    {
-      char dest[16]; /* fits a binary IPv6 address */
-      char norm[MAX_IPADR_LEN];
-      hostname[hlen] = 0; /* end the address there */
-      if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
-        return CURLUE_BAD_IPV6;
-
-      /* check if it can be done shorter */
-      if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
-         (strlen(norm) < hlen)) {
-        strcpy(hostname, norm);
-        hlen = strlen(norm);
-        hostname[hlen + 1] = 0;
-      }
-      hostname[hlen] = ']'; /* restore ending bracket */
+    /* check if it can be done shorter */
+    if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
+       (strlen(norm) < hlen)) {
+      strcpy(hostname, norm);
+      hlen = strlen(norm);
+      hostname[hlen + 1] = 0;
     }
-#endif
+    hostname[hlen] = ']'; /* restore ending bracket */
   }
+  return CURLUE_OK;
+}
+
+static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
+                                size_t hlen) /* length of hostname */
+{
+  size_t len;
+  DEBUGASSERT(hostname);
+
+  if(!hlen)
+    return CURLUE_NO_HOST;
+  else if(hostname[0] == '[')
+    return ipv6_parse(u, hostname, hlen);
   else {
     /* letters from the second string are not ok */
-    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,");
+    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
     if(hlen != len)
       /* hostname with bad content */
       return CURLUE_BAD_HOSTNAME;
@@ -644,50 +638,52 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
   return CURLUE_OK;
 }
 
-#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
-
 /*
  * Handle partial IPv4 numerical addresses and different bases, like
  * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
  *
- * If the given input string is syntactically wrong or any part for example is
- * too big, this function returns FALSE and doesn't create any output.
+ * If the given input string is syntactically wrong IPv4 or any part for
+ * example is too big, this function returns HOST_NAME.
  *
  * Output the "normalized" version of that input string in plain quad decimal
- * integers and return TRUE.
+ * integers.
+ *
+ * Returns the host type.
  */
-static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
+
+#define HOST_ERROR   -1 /* out of memory */
+#define HOST_BAD     -2 /* bad IPv4 address */
+
+#define HOST_NAME    1
+#define HOST_IPV4    2
+#define HOST_IPV6    3
+
+static int ipv4_normalize(struct dynbuf *host)
 {
   bool done = FALSE;
   int n = 0;
-  const char *c = hostname;
+  const char *c = Curl_dyn_ptr(host);
   unsigned long parts[4] = {0, 0, 0, 0};
+  CURLcode result = CURLE_OK;
+
+  if(*c == '[')
+    return HOST_IPV6;
 
   while(!done) {
     char *endp;
     unsigned long l;
-    if((*c < '0') || (*c > '9'))
+    if(!ISDIGIT(*c))
       /* most importantly this doesn't allow a leading plus or minus */
-      return FALSE;
+      return HOST_NAME;
     l = strtoul(c, &endp, 0);
 
-    /* overflow or nothing parsed at all */
-    if(((l == ULONG_MAX) && (errno == ERANGE)) ||  (endp == c))
-      return FALSE;
-
-#if SIZEOF_LONG > 4
-    /* a value larger than 32 bits */
-    if(l > UINT_MAX)
-      return FALSE;
-#endif
-
     parts[n] = l;
     c = endp;
 
-    switch (*c) {
-    case '.' :
+    switch(*c) {
+    case '.':
       if(n == 3)
-        return FALSE;
+        return HOST_NAME;
       n++;
       c++;
       break;
@@ -697,51 +693,63 @@ static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
       break;
 
     default:
-      return FALSE;
+      return HOST_NAME;
     }
-  }
 
-  /* this is deemed a valid IPv4 numerical address */
+    /* overflow */
+    if((l == ULONG_MAX) && (errno == ERANGE))
+      return HOST_NAME;
+
+#if SIZEOF_LONG > 4
+    /* a value larger than 32 bits */
+    if(l > UINT_MAX)
+      return HOST_NAME;
+#endif
+  }
 
   switch(n) {
   case 0: /* a -- 32 bits */
-    msnprintf(outp, olen, "%u.%u.%u.%u",
-              parts[0] >> 24, (parts[0] >> 16) & 0xff,
-              (parts[0] >> 8) & 0xff, parts[0] & 0xff);
+    Curl_dyn_reset(host);
+
+    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+                           parts[0] >> 24, (parts[0] >> 16) & 0xff,
+                           (parts[0] >> 8) & 0xff, parts[0] & 0xff);
     break;
   case 1: /* a.b -- 8.24 bits */
     if((parts[0] > 0xff) || (parts[1] > 0xffffff))
-      return FALSE;
-    msnprintf(outp, olen, "%u.%u.%u.%u",
-              parts[0], (parts[1] >> 16) & 0xff,
-              (parts[1] >> 8) & 0xff, parts[1] & 0xff);
+      return HOST_NAME;
+    Curl_dyn_reset(host);
+    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+                           parts[0], (parts[1] >> 16) & 0xff,
+                           (parts[1] >> 8) & 0xff, parts[1] & 0xff);
     break;
   case 2: /* a.b.c -- 8.8.16 bits */
     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
-      return FALSE;
-    msnprintf(outp, olen, "%u.%u.%u.%u",
-              parts[0], parts[1], (parts[2] >> 8) & 0xff,
-              parts[2] & 0xff);
+      return HOST_NAME;
+    Curl_dyn_reset(host);
+    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+                           parts[0], parts[1], (parts[2] >> 8) & 0xff,
+                           parts[2] & 0xff);
     break;
   case 3: /* a.b.c.d -- 8.8.8.8 bits */
     if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
        (parts[3] > 0xff))
-      return FALSE;
-    msnprintf(outp, olen, "%u.%u.%u.%u",
-              parts[0], parts[1], parts[2], parts[3]);
+      return HOST_NAME;
+    Curl_dyn_reset(host);
+    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
+                           parts[0], parts[1], parts[2], parts[3]);
     break;
   }
-  return TRUE;
+  if(result)
+    return HOST_ERROR;
+  return HOST_IPV4;
 }
 
 /* if necessary, replace the host content with a URL decoded version */
-static CURLUcode decode_host(struct dynbuf *host)
+static CURLUcode urldecode_host(struct dynbuf *host)
 {
   char *per = NULL;
   const char *hostname = Curl_dyn_ptr(host);
-  if(hostname[0] == '[')
-    /* only decode if not an ipv6 numerical */
-    return CURLUE_OK;
   per = strchr(hostname, '%');
   if(!per)
     /* nothing to decode */
@@ -764,6 +772,78 @@ static CURLUcode decode_host(struct dynbuf *host)
   return CURLUE_OK;
 }
 
+static CURLUcode parse_authority(struct Curl_URL *u,
+                                 const char *auth, size_t authlen,
+                                 unsigned int flags,
+                                 struct dynbuf *host,
+                                 bool has_scheme)
+{
+  size_t offset;
+  CURLUcode result;
+
+  /*
+   * Parse the login details and strip them out of the host name.
+   */
+  result = parse_hostname_login(u, auth, authlen, flags, &offset);
+  if(result)
+    goto out;
+
+  if(Curl_dyn_addn(host, auth + offset, authlen - offset)) {
+    result = CURLUE_OUT_OF_MEMORY;
+    goto out;
+  }
+
+  result = Curl_parse_port(u, host, has_scheme);
+  if(result)
+    goto out;
+
+  if(!Curl_dyn_len(host))
+    return CURLUE_NO_HOST;
+
+  switch(ipv4_normalize(host)) {
+  case HOST_IPV4:
+    break;
+  case HOST_IPV6:
+    result = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
+    break;
+  case HOST_NAME:
+    result = urldecode_host(host);
+    if(!result)
+      result = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
+    break;
+  case HOST_ERROR:
+    result = CURLUE_OUT_OF_MEMORY;
+    break;
+  case HOST_BAD:
+  default:
+    result = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
+    break;
+  }
+
+out:
+  return result;
+}
+
+CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
+                                 unsigned int flags)
+{
+  CURLUcode result;
+  struct dynbuf host;
+
+  DEBUGASSERT(authority);
+  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
+
+  result = parse_authority(u, authority, strlen(authority), flags,
+                           &host, !!u->scheme);
+  if(result)
+    Curl_dyn_free(&host);
+  else {
+    free(u->host);
+    u->host = Curl_dyn_ptr(&host);
+  }
+  return result;
+}
+
 /*
  * "Remove Dot Segments"
  * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
@@ -782,32 +862,27 @@ static CURLUcode decode_host(struct dynbuf *host)
  *
  * RETURNS
  *
- * an allocated dedotdotified output string
+ * Zero for success and 'out' set to an allocated dedotdotified string.
  */
-UNITTEST char *dedotdotify(const char *input, size_t clen);
-UNITTEST char *dedotdotify(const char *input, size_t clen)
+UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
+UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
 {
-  char *out = malloc(clen + 1);
   char *outptr;
-  const char *orginput = input;
-  char *queryp;
+  const char *endp = &input[clen];
+  char *out;
+
+  *outp = NULL;
+  /* the path always starts with a slash, and a slash has not dot */
+  if((clen < 2) || !memchr(input, '.', clen))
+    return 0;
+
+  out = malloc(clen + 1);
   if(!out)
-    return NULL; /* out of memory */
+    return 1; /* out of memory */
 
   *out = 0; /* null-terminates, for inputs like "./" */
   outptr = out;
 
-  if(!*input)
-    /* zero length input string, return that */
-    return out;
-
-  /*
-   * To handle query-parts properly, we must find it and remove it during the
-   * dotdot-operation and then append it again at the end to the output
-   * string.
-   */
-  queryp = strchr(input, '?');
-
   do {
     bool dotdot = TRUE;
     if(*input == '.') {
@@ -893,30 +968,20 @@ UNITTEST char *dedotdotify(const char *input, size_t clen)
       *outptr = 0;
     }
 
-    /* continue until end of input string OR, if there is a terminating
-       query part, stop there */
-  } while(*input && (!queryp || (input < queryp)));
-
-  if(queryp) {
-    size_t qlen;
-    /* There was a query part, append that to the output. */
-    size_t oindex = queryp - orginput;
-    qlen = strlen(&orginput[oindex]);
-    memcpy(outptr, &orginput[oindex], qlen + 1); /* include zero byte */
-  }
+    /* continue until end of path */
+  } while(input < endp);
 
-  return out;
+  *outp = out;
+  return 0; /* success */
 }
 
 static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
 {
   const char *path;
   size_t pathlen;
-  bool uncpath = FALSE;
   char *query = NULL;
   char *fragment = NULL;
   char schemebuf[MAX_SCHEME_LEN + 1];
-  const char *schemep = NULL;
   size_t schemelen = 0;
   size_t urllen;
   CURLUcode result = CURLUE_OK;
@@ -927,16 +992,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
 
   Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
 
-  /*************************************************************
-   * Parse the URL.
-   ************************************************************/
-  /* allocate scratch area */
-  urllen = strlen(url);
-  if(urllen > CURL_MAX_INPUT_LENGTH) {
-    /* excessive input length */
-    result = CURLUE_MALFORMED_INPUT;
+  result = junkscan(url, &urllen, flags);
+  if(result)
     goto fail;
-  }
 
   schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
                                    flags & (CURLU_GUESS_SCHEME|
@@ -944,6 +1002,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
 
   /* handle the file: scheme */
   if(schemelen && !strcmp(schemebuf, "file")) {
+    bool uncpath = FALSE;
     if(urllen <= 6) {
       /* file:/ is not enough to actually be a complete file: URL */
       result = CURLUE_BAD_FILE_URL;
@@ -952,8 +1011,9 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
 
     /* path has been allocated large enough to hold this */
     path = (char *)&url[5];
+    pathlen = urllen - 5;
 
-    schemep = u->scheme = strdup("file");
+    u->scheme = strdup("file");
     if(!u->scheme) {
       result = CURLUE_OUT_OF_MEMORY;
       goto fail;
@@ -996,7 +1056,7 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
           ptr += 9; /* now points to the slash after the host */
         }
         else {
-#if defined(WIN32)
+#if defined(_WIN32)
           size_t len;
 
           /* the host name, NetBIOS computer name, can not contain disallowed
@@ -1028,13 +1088,14 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
       }
 
       path = ptr;
+      pathlen = urllen - (ptr - url);
     }
 
     if(!uncpath)
       /* no host for file: URLs by default */
       Curl_dyn_reset(&host);
 
-#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__)
+#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
     /* Don't allow Windows drive letters when not in Windows.
      * This catches both "file:/c:" and "file:c:" */
     if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
@@ -1048,40 +1109,38 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
     if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
       /* This cannot be done with strcpy, as the memory chunks overlap! */
       path++;
+      pathlen--;
     }
 #endif
 
   }
   else {
     /* clear path */
-    const char *p;
+    const char *schemep = NULL;
     const char *hostp;
-    size_t len;
+    size_t hostlen;
 
     if(schemelen) {
       int i = 0;
-      p = &url[schemelen + 1];
-      while(p && (*p == '/') && (i < 4)) {
+      const char *p = &url[schemelen + 1];
+      while((*p == '/') && (i < 4)) {
         p++;
         i++;
       }
 
       schemep = schemebuf;
-      if(!Curl_builtin_scheme(schemep, CURL_ZERO_TERMINATED) &&
+      if(!Curl_get_scheme_handler(schemep) &&
          !(flags & CURLU_NON_SUPPORT_SCHEME)) {
         result = CURLUE_UNSUPPORTED_SCHEME;
         goto fail;
       }
 
-      if((i < 1) || (i>3)) {
+      if((i < 1) || (i > 3)) {
         /* less than one or more than three slashes */
         result = CURLUE_BAD_SLASHES;
         goto fail;
       }
-      if(junkscan(schemep, flags)) {
-        result = CURLUE_BAD_SCHEME;
-        goto fail;
-      }
+      hostp = p; /* host name starts here */
     }
     else {
       /* no scheme! */
@@ -1096,63 +1155,101 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
       /*
        * The URL was badly formatted, let's try without scheme specified.
        */
-      p = url;
+      hostp = url;
     }
-    hostp = p; /* host name starts here */
-
-    /* find the end of the host name + port number */
-    while(*p && !HOSTNAME_END(*p))
-      p++;
 
-    len = p - hostp;
-    if(len) {
-      if(Curl_dyn_addn(&host, hostp, len)) {
+    if(schemep) {
+      u->scheme = strdup(schemep);
+      if(!u->scheme) {
         result = CURLUE_OUT_OF_MEMORY;
         goto fail;
       }
     }
-    else {
-      if(!(flags & CURLU_NO_AUTHORITY)) {
-        result = CURLUE_NO_HOST;
+
+    /* find the end of the host name + port number */
+    hostlen = strcspn(hostp, "/?#");
+    path = &hostp[hostlen];
+
+    /* this pathlen also contains the query and the fragment */
+    pathlen = urllen - (path - url);
+    if(hostlen) {
+
+      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
+      if(result)
         goto fail;
-      }
-    }
 
-    path = (char *)p;
+      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
+        const char *hostname = Curl_dyn_ptr(&host);
+        /* legacy curl-style guess based on host name */
+        if(checkprefix("ftp.", hostname))
+          schemep = "ftp";
+        else if(checkprefix("dict.", hostname))
+          schemep = "dict";
+        else if(checkprefix("ldap.", hostname))
+          schemep = "ldap";
+        else if(checkprefix("imap.", hostname))
+          schemep = "imap";
+        else if(checkprefix("smtp.", hostname))
+          schemep = "smtp";
+        else if(checkprefix("pop3.", hostname))
+          schemep = "pop3";
+        else
+          schemep = "http";
 
-    if(schemep) {
-      u->scheme = strdup(schemep);
-      if(!u->scheme) {
+        u->scheme = strdup(schemep);
+        if(!u->scheme) {
+          result = CURLUE_OUT_OF_MEMORY;
+          goto fail;
+        }
+      }
+    }
+    else if(flags & CURLU_NO_AUTHORITY) {
+      /* allowed to be empty. */
+      if(Curl_dyn_add(&host, "")) {
         result = CURLUE_OUT_OF_MEMORY;
         goto fail;
       }
     }
+    else {
+      result = CURLUE_NO_HOST;
+      goto fail;
+    }
   }
 
   fragment = strchr(path, '#');
   if(fragment) {
-    fraglen = strlen(fragment);
+    fraglen = pathlen - (fragment - path);
     if(fraglen > 1) {
       /* skip the leading '#' in the copy but include the terminating null */
-      u->fragment = Curl_memdup(fragment + 1, fraglen);
-      if(!u->fragment) {
-        result = CURLUE_OUT_OF_MEMORY;
-        goto fail;
+      if(flags & CURLU_URLENCODE) {
+        struct dynbuf enc;
+        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+        if(urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE)) {
+          result = CURLUE_OUT_OF_MEMORY;
+          goto fail;
+        }
+        u->fragment = Curl_dyn_ptr(&enc);
       }
-
-      if(junkscan(u->fragment, flags)) {
-        result = CURLUE_BAD_FRAGMENT;
-        goto fail;
+      else {
+        u->fragment = Curl_strndup(fragment + 1, fraglen - 1);
+        if(!u->fragment) {
+          result = CURLUE_OUT_OF_MEMORY;
+          goto fail;
+        }
       }
     }
+    /* after this, pathlen still contains the query */
+    pathlen -= fraglen;
   }
 
-  query = strchr(path, '?');
-  if(query && (!fragment || (query < fragment))) {
-    size_t qlen = strlen(query) - fraglen; /* includes '?' */
-    pathlen = strlen(path) - qlen - fraglen;
+  DEBUGASSERT(pathlen < urllen);
+  query = memchr(path, '?', pathlen);
+  if(query) {
+    size_t qlen = fragment ? (size_t)(fragment - query) :
+      pathlen - (query - path);
+    pathlen -= qlen;
     if(qlen > 1) {
-      if(qlen && (flags & CURLU_URLENCODE)) {
+      if(flags & CURLU_URLENCODE) {
         struct dynbuf enc;
         Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
         /* skip the leading question mark */
@@ -1163,17 +1260,11 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
         u->query = Curl_dyn_ptr(&enc);
       }
       else {
-        u->query = Curl_memdup(query + 1, qlen);
+        u->query = Curl_strndup(query + 1, qlen - 1);
         if(!u->query) {
           result = CURLUE_OUT_OF_MEMORY;
           goto fail;
         }
-        u->query[qlen - 1] = 0;
-      }
-
-      if(junkscan(u->query, flags)) {
-        result = CURLUE_BAD_QUERY;
-        goto fail;
       }
     }
     else {
@@ -1185,8 +1276,6 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
       }
     }
   }
-  else
-    pathlen = strlen(path) - fraglen;
 
   if(pathlen && (flags & CURLU_URLENCODE)) {
     struct dynbuf enc;
@@ -1199,111 +1288,42 @@ static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
     path = u->path = Curl_dyn_ptr(&enc);
   }
 
-  if(!pathlen) {
-    /* there is no path left, unset */
+  if(pathlen <= 1) {
+    /* there is no path left or just the slash, unset */
     path = NULL;
   }
   else {
     if(!u->path) {
-      u->path = Curl_memdup(path, pathlen + 1);
+      u->path = Curl_strndup(path, pathlen);
       if(!u->path) {
         result = CURLUE_OUT_OF_MEMORY;
         goto fail;
       }
-      u->path[pathlen] = 0;
       path = u->path;
     }
     else if(flags & CURLU_URLENCODE)
       /* it might have encoded more than just the path so cut it */
       u->path[pathlen] = 0;
 
-    if(junkscan(u->path, flags)) {
-      result = CURLUE_BAD_PATH;
-      goto fail;
-    }
-
     if(!(flags & CURLU_PATH_AS_IS)) {
       /* remove ../ and ./ sequences according to RFC3986 */
-      char *newp = dedotdotify((char *)path, pathlen);
-      if(!newp) {
+      char *dedot;
+      int err = dedotdotify((char *)path, pathlen, &dedot);
+      if(err) {
         result = CURLUE_OUT_OF_MEMORY;
         goto fail;
       }
-      free(u->path);
-      u->path = newp;
-    }
-  }
-
-  if(Curl_dyn_len(&host)) {
-    char normalized_ipv4[sizeof("255.255.255.255") + 1];
-
-    /*
-     * Parse the login details and strip them out of the host name.
-     */
-    result = parse_hostname_login(u, &host, flags);
-    if(!result)
-      result = Curl_parse_port(u, &host, schemelen);
-    if(result)
-      goto fail;
-
-    if(junkscan(Curl_dyn_ptr(&host), flags)) {
-      result = CURLUE_BAD_HOSTNAME;
-      goto fail;
-    }
-
-    if(ipv4_normalize(Curl_dyn_ptr(&host),
-                      normalized_ipv4, sizeof(normalized_ipv4))) {
-      Curl_dyn_reset(&host);
-      if(Curl_dyn_add(&host, normalized_ipv4)) {
-        result = CURLUE_OUT_OF_MEMORY;
-        goto fail;
+      if(dedot) {
+        free(u->path);
+        u->path = dedot;
       }
     }
-    else {
-      result = decode_host(&host);
-      if(!result)
-        result = hostname_check(u, Curl_dyn_ptr(&host), Curl_dyn_len(&host));
-      if(result)
-        goto fail;
-    }
-
-    if((flags & CURLU_GUESS_SCHEME) && !schemep) {
-      const char *hostname = Curl_dyn_ptr(&host);
-      /* legacy curl-style guess based on host name */
-      if(checkprefix("ftp.", hostname))
-        schemep = "ftp";
-      else if(checkprefix("dict.", hostname))
-        schemep = "dict";
-      else if(checkprefix("ldap.", hostname))
-        schemep = "ldap";
-      else if(checkprefix("imap.", hostname))
-        schemep = "imap";
-      else if(checkprefix("smtp.", hostname))
-        schemep = "smtp";
-      else if(checkprefix("pop3.", hostname))
-        schemep = "pop3";
-      else
-        schemep = "http";
-
-      u->scheme = strdup(schemep);
-      if(!u->scheme) {
-        result = CURLUE_OUT_OF_MEMORY;
-        goto fail;
-      }
-    }
-  }
-  else if(flags & CURLU_NO_AUTHORITY) {
-    /* allowed to be empty. */
-    if(Curl_dyn_add(&host, "")) {
-      result = CURLUE_OUT_OF_MEMORY;
-      goto fail;
-    }
   }
 
   u->host = Curl_dyn_ptr(&host);
 
   return result;
-  fail:
+fail:
   Curl_dyn_free(&host);
   free_urlhandle(u);
   return result;
@@ -1330,7 +1350,7 @@ static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
  */
 CURLU *curl_url(void)
 {
-  return calloc(sizeof(struct Curl_URL), 1);
+  return calloc(1, sizeof(struct Curl_URL));
 }
 
 void curl_url_cleanup(CURLU *u)
@@ -1350,9 +1370,9 @@ void curl_url_cleanup(CURLU *u)
     }                                           \
   } while(0)
 
-CURLU *curl_url_dup(CURLU *in)
+CURLU *curl_url_dup(const CURLU *in)
 {
-  struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1);
+  struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
   if(u) {
     DUP(u, in, scheme);
     DUP(u, in, user);
@@ -1363,22 +1383,25 @@ CURLU *curl_url_dup(CURLU *in)
     DUP(u, in, path);
     DUP(u, in, query);
     DUP(u, in, fragment);
+    DUP(u, in, zoneid);
     u->portnum = in->portnum;
   }
   return u;
-  fail:
+fail:
   curl_url_cleanup(u);
   return NULL;
 }
 
-CURLUcode curl_url_get(CURLU *u, CURLUPart what,
+CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
                        char **part, unsigned int flags)
 {
-  char *ptr;
+  const char *ptr;
   CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
   char portbuf[7];
   bool urldecode = (flags & CURLU_URLDECODE)?1:0;
   bool urlencode = (flags & CURLU_URLENCODE)?1:0;
+  bool punycode = FALSE;
+  bool depunyfy = FALSE;
   bool plusdecode = FALSE;
   (void)flags;
   if(!u)
@@ -1408,6 +1431,8 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
   case CURLUPART_HOST:
     ptr = u->host;
     ifmissing = CURLUE_NO_HOST;
+    punycode = (flags & CURLU_PUNYCODE)?1:0;
+    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
     break;
   case CURLUPART_ZONEID:
     ptr = u->zoneid;
@@ -1420,8 +1445,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
     if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
       /* there's no stored port number, but asked to deliver
          a default one for the scheme */
-      const struct Curl_handler *h =
-        Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
       if(h) {
         msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
         ptr = portbuf;
@@ -1430,8 +1454,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
     else if(ptr && u->scheme) {
       /* there is a stored port number, but ask to inhibit if
          it matches the default one for the scheme */
-      const struct Curl_handler *h =
-        Curl_builtin_scheme(u->scheme, CURL_ZERO_TERMINATED);
+      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
       if(h && (h->defport == u->portnum) &&
          (flags & CURLU_NO_DEFAULT_PORT))
         ptr = NULL;
@@ -1439,11 +1462,8 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
     break;
   case CURLUPART_PATH:
     ptr = u->path;
-    if(!ptr) {
-      ptr = u->path = strdup("/");
-      if(!u->path)
-        return CURLUE_OUT_OF_MEMORY;
-    }
+    if(!ptr)
+      ptr = "/";
     break;
   case CURLUPART_QUERY:
     ptr = u->query;
@@ -1460,6 +1480,8 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
     char *options = u->options;
     char *port = u->port;
     char *allochost = NULL;
+    punycode = (flags & CURLU_PUNYCODE)?1:0;
+    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
     if(u->scheme && strcasecompare("file", u->scheme)) {
       url = aprintf("file://%s%s%s",
                     u->path,
@@ -1477,7 +1499,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
       else
         return CURLUE_NO_SCHEME;
 
-      h = Curl_builtin_scheme(scheme, CURL_ZERO_TERMINATED);
+      h = Curl_get_scheme_handler(scheme);
       if(!port && (flags & CURLU_DEFAULT_PORT)) {
         /* there's no stored port number, but asked to deliver
            a default one for the scheme */
@@ -1514,39 +1536,33 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
         if(!allochost)
           return CURLUE_OUT_OF_MEMORY;
       }
-      else {
-        /* only encode '%' in output host name */
-        char *host = u->host;
-        bool percent = FALSE;
-        /* first, count number of percents present in the name */
-        while(*host) {
-          if(*host == '%') {
-            percent = TRUE;
-            break;
-          }
-          host++;
+      else if(punycode) {
+        if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+          return CURLUE_LACKS_IDN;
+#else
+          CURLcode result = Curl_idn_decode(u->host, &allochost);
+          if(result)
+            return (result == CURLE_OUT_OF_MEMORY) ?
+              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+#endif
         }
-        /* if there were percent(s), encode the host name */
-        if(percent) {
-          struct dynbuf enc;
-          CURLcode result;
-          Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
-          host = u->host;
-          while(*host) {
-            if(*host == '%')
-              result = Curl_dyn_addn(&enc, "%25", 3);
-            else
-              result = Curl_dyn_addn(&enc, host, 1);
-            if(result)
-              return CURLUE_OUT_OF_MEMORY;
-            host++;
-          }
-          free(u->host);
-          u->host = Curl_dyn_ptr(&enc);
+      }
+      else if(depunyfy) {
+        if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
+#ifndef USE_IDN
+          return CURLUE_LACKS_IDN;
+#else
+          CURLcode result = Curl_idn_encode(u->host, &allochost);
+          if(result)
+            /* this is the most likely error */
+            return (result == CURLE_OUT_OF_MEMORY) ?
+              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+#endif
         }
       }
 
-      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                     scheme,
                     u->user ? u->user : "",
                     u->password ? ":": "",
@@ -1557,7 +1573,6 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
                     allochost ? allochost : u->host,
                     port ? ":": "",
                     port ? port : "",
-                    (u->path && (u->path[0] != '/')) ? "/": "",
                     u->path ? u->path : "/",
                     (u->query && u->query[0]) ? "?": "",
                     (u->query && u->query[0]) ? u->query : "",
@@ -1577,7 +1592,7 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
   if(ptr) {
     size_t partlen = strlen(ptr);
     size_t i = 0;
-    *part = Curl_memdup(ptr, partlen + 1);
+    *part = Curl_strndup(ptr, partlen);
     if(!*part)
       return CURLUE_OUT_OF_MEMORY;
     if(plusdecode) {
@@ -1611,6 +1626,36 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what,
       free(*part);
       *part = Curl_dyn_ptr(&enc);
     }
+    else if(punycode) {
+      if(!Curl_is_ASCII_name(u->host)) {
+#ifndef USE_IDN
+        return CURLUE_LACKS_IDN;
+#else
+        char *allochost;
+        CURLcode result = Curl_idn_decode(*part, &allochost);
+        if(result)
+          return (result == CURLE_OUT_OF_MEMORY) ?
+            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+        free(*part);
+        *part = allochost;
+#endif
+      }
+    }
+    else if(depunyfy) {
+      if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
+#ifndef USE_IDN
+        return CURLUE_LACKS_IDN;
+#else
+        char *allochost;
+        CURLcode result = Curl_idn_encode(*part, &allochost);
+        if(result)
+          return (result == CURLE_OUT_OF_MEMORY) ?
+            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
+        free(*part);
+        *part = allochost;
+#endif
+      }
+    }
 
     return CURLUE_OK;
   }
@@ -1626,8 +1671,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
   bool plusencode = FALSE;
   bool urlskipslash = FALSE;
+  bool leadingslash = FALSE;
   bool appendquery = FALSE;
   bool equalsencode = FALSE;
+  size_t nalloc;
 
   if(!u)
     return CURLUE_BAD_HANDLE;
@@ -1680,18 +1727,36 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
     return CURLUE_OK;
   }
 
+  nalloc = strlen(part);
+  if(nalloc > CURL_MAX_INPUT_LENGTH)
+    /* excessive input length */
+    return CURLUE_MALFORMED_INPUT;
+
   switch(what) {
-  case CURLUPART_SCHEME:
-    if(strlen(part) > MAX_SCHEME_LEN)
-      /* too long */
+  case CURLUPART_SCHEME: {
+    size_t plen = strlen(part);
+    const char *s = part;
+    if((plen > MAX_SCHEME_LEN) || (plen < 1))
+      /* too long or too short */
       return CURLUE_BAD_SCHEME;
-    if(!(flags & CURLU_NON_SUPPORT_SCHEME) &&
-       /* verify that it is a fine scheme */
-       !Curl_builtin_scheme(part, CURL_ZERO_TERMINATED))
+   /* verify that it is a fine scheme */
+    if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
       return CURLUE_UNSUPPORTED_SCHEME;
     storep = &u->scheme;
     urlencode = FALSE; /* never */
+    if(ISALPHA(*s)) {
+      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
+      while(--plen) {
+        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
+          s++; /* fine */
+        else
+          return CURLUE_BAD_SCHEME;
+      }
+    }
+    else
+      return CURLUE_BAD_SCHEME;
     break;
+  }
   case CURLUPART_USER:
     storep = &u->user;
     break;
@@ -1701,15 +1766,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   case CURLUPART_OPTIONS:
     storep = &u->options;
     break;
-  case CURLUPART_HOST: {
-    size_t len = strcspn(part, " \r\n");
-    if(strlen(part) != len)
-      /* hostname with bad content */
-      return CURLUE_BAD_HOSTNAME;
+  case CURLUPART_HOST:
     storep = &u->host;
     Curl_safefree(u->zoneid);
     break;
-  }
   case CURLUPART_ZONEID:
     storep = &u->zoneid;
     break;
@@ -1728,6 +1788,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   break;
   case CURLUPART_PATH:
     urlskipslash = TRUE;
+    leadingslash = TRUE; /* enforce */
     storep = &u->path;
     break;
   case CURLUPART_QUERY:
@@ -1750,6 +1811,10 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
     char *oldurl;
     char *redired_url;
 
+    if(!nalloc)
+      /* a blank URL is not a valid URL */
+      return CURLUE_MALFORMED_INPUT;
+
     /* if the new thing is absolute or the old one is not
      * (we could not get an absolute url in 'oldurl'),
      * then replace the existing with the new. */
@@ -1776,18 +1841,17 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
   }
   DEBUGASSERT(storep);
   {
-    const char *newp = part;
-    size_t nalloc = strlen(part);
-
-    if(nalloc > CURL_MAX_INPUT_LENGTH)
-      /* excessive input length */
-      return CURLUE_MALFORMED_INPUT;
+    const char *newp;
+    struct dynbuf enc;
+    Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
 
+    if(leadingslash && (part[0] != '/')) {
+      CURLcode result = Curl_dyn_addn(&enc, "/", 1);
+      if(result)
+        return CURLUE_OUT_OF_MEMORY;
+    }
     if(urlencode) {
       const unsigned char *i;
-      struct dynbuf enc;
-
-      Curl_dyn_init(&enc, nalloc * 3 + 1);
 
       for(i = (const unsigned char *)part; *i; i++) {
         CURLcode result;
@@ -1796,7 +1860,7 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
           if(result)
             return CURLUE_OUT_OF_MEMORY;
         }
-        else if(Curl_isunreserved(*i) ||
+        else if(ISUNRESERVED(*i) ||
                 ((*i == '/') && urlskipslash) ||
                 ((*i == '=') && equalsencode)) {
           if((*i == '=') && equalsencode)
@@ -1807,19 +1871,21 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
             return CURLUE_OUT_OF_MEMORY;
         }
         else {
-          result = Curl_dyn_addf(&enc, "%%%02x", *i);
+          char out[3]={'%'};
+          out[1] = hexdigits[*i>>4];
+          out[2] = hexdigits[*i & 0xf];
+          result = Curl_dyn_addn(&enc, out, 3);
           if(result)
             return CURLUE_OUT_OF_MEMORY;
         }
       }
-      newp = Curl_dyn_ptr(&enc);
     }
     else {
       char *p;
-      newp = strdup(part);
-      if(!newp)
+      CURLcode result = Curl_dyn_add(&enc, part);
+      if(result)
         return CURLUE_OUT_OF_MEMORY;
-      p = (char *)newp;
+      p = Curl_dyn_ptr(&enc);
       while(*p) {
         /* make sure percent encoded are lower case */
         if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
@@ -1832,44 +1898,45 @@ CURLUcode curl_url_set(CURLU *u, CURLUPart what,
           p++;
       }
     }
+    newp = Curl_dyn_ptr(&enc);
 
-    if(appendquery) {
+    if(appendquery && newp) {
       /* Append the 'newp' string onto the old query. Add a '&' separator if
          none is present at the end of the existing query already */
 
       size_t querylen = u->query ? strlen(u->query) : 0;
       bool addamperand = querylen && (u->query[querylen -1] != '&');
       if(querylen) {
-        struct dynbuf enc;
-        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
+        struct dynbuf qbuf;
+        Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
 
-        if(Curl_dyn_addn(&enc, u->query, querylen)) /* add original query */
+        if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
           goto nomem;
 
         if(addamperand) {
-          if(Curl_dyn_addn(&enc, "&", 1))
+          if(Curl_dyn_addn(&qbuf, "&", 1))
             goto nomem;
         }
-        if(Curl_dyn_add(&enc, newp))
+        if(Curl_dyn_add(&qbuf, newp))
           goto nomem;
-        free((char *)newp);
+        Curl_dyn_free(&enc);
         free(*storep);
-        *storep = Curl_dyn_ptr(&enc);
+        *storep = Curl_dyn_ptr(&qbuf);
         return CURLUE_OK;
-        nomem:
-        free((char *)newp);
+nomem:
+        Curl_dyn_free(&enc);
         return CURLUE_OUT_OF_MEMORY;
       }
     }
 
-    if(what == CURLUPART_HOST) {
-      size_t n = strlen(newp);
+    else if(what == CURLUPART_HOST) {
+      size_t n = Curl_dyn_len(&enc);
       if(!n && (flags & CURLU_NO_AUTHORITY)) {
         /* Skip hostname check, it's allowed to be empty. */
       }
       else {
-        if(hostname_check(u, (char *)newp, n)) {
-          free((char *)newp);
+        if(!n || hostname_check(u, (char *)newp, n)) {
+          Curl_dyn_free(&enc);
           return CURLUE_BAD_HOSTNAME;
         }
       }
author	AlexSm <alex@ydb.tech>	2024-01-18 11:28:56 +0100
committer	GitHub <noreply@github.com>	2024-01-18 11:28:56 +0100
commit	9d0a3761b3201e0d9db879a7adf91876ebdb0564 (patch)
tree	541d11ac878c18efd7ebca81e35112aa0fef995b /contrib/libs/curl/lib/urlapi.c
parent	404ef8886ecc9736bc58ade6da2fbd83b486a408 (diff)
download	ydb-9d0a3761b3201e0d9db879a7adf91876ebdb0564.tar.gz