aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/libidn/tld.c
diff options
context:
space:
mode:
authordenplusplus <denplusplus@yandex-team.ru>2022-02-10 16:47:34 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:34 +0300
commit57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a (patch)
treecc63639f8e502db19a82c20e2861c6d1edbf9fea /contrib/libs/libidn/tld.c
parent464ba3814a83db4f2d5327393b0b6eaf0c86bfd7 (diff)
downloadydb-57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a.tar.gz
Restoring authorship annotation for <denplusplus@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/libidn/tld.c')
-rw-r--r--contrib/libs/libidn/tld.c1028
1 files changed, 514 insertions, 514 deletions
diff --git a/contrib/libs/libidn/tld.c b/contrib/libs/libidn/tld.c
index 4e894663fb..88a26b54ca 100644
--- a/contrib/libs/libidn/tld.c
+++ b/contrib/libs/libidn/tld.c
@@ -1,514 +1,514 @@
-/* tld.c --- Handle TLD restriction checking.
- * Copyright (C) 2004, 2005, 2006, 2007 Simon Josefsson.
- * Copyright (C) 2003, 2004 Free Software Foundation, Inc.
- *
- * Author: Thomas Jacob, Internet24.de
- *
- * This file is part of GNU Libidn.
- *
- * GNU Libidn is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * GNU Libidn is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with GNU Libidn; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
- *
- */
-
-/* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
-#include <stringprep.h>
-
-/* Get strcmp(). */
-#include <string.h>
-
-/* Get specifications. */
-#include <tld.h>
-
-/* Array of built-in domain restriction structures. See tlds.c. */
-extern const Tld_table *_tld_tables[];
-
-/**
- * tld_get_table - get table for a TLD name in table
- * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string.
- * @tables: Zero terminated array of #Tld_table info-structures for
- * TLDs.
- *
- * Get the TLD table for a named TLD by searching through the given
- * TLD table array.
- *
- * Return value: Return structure corresponding to TLD @tld by going
- * thru @tables, or return %NULL if no such structure is found.
- */
-const Tld_table *
-tld_get_table (const char *tld, const Tld_table ** tables)
-{
- const Tld_table **tldtable = NULL;
-
- if (!tld || !tables)
- return NULL;
-
- for (tldtable = tables; *tldtable; tldtable++)
- if (!strcmp ((*tldtable)->name, tld))
- return *tldtable;
-
- return NULL;
-}
-
-/**
- * tld_default_table - get table for a TLD name
- * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string.
- * @overrides: Additional zero terminated array of #Tld_table
- * info-structures for TLDs, or %NULL to only use library deault
- * tables.
- *
- * Get the TLD table for a named TLD, using the internal defaults,
- * possibly overrided by the (optional) supplied tables.
- *
- * Return value: Return structure corresponding to TLD @tld_str, first
- * looking through @overrides then thru built-in list, or %NULL if
- * no such structure found.
- */
-const Tld_table *
-tld_default_table (const char *tld, const Tld_table ** overrides)
-{
- const Tld_table *tldtable = NULL;
-
- if (!tld)
- return NULL;
-
- if (overrides)
- tldtable = tld_get_table (tld, overrides);
-
- if (!tldtable)
- tldtable = tld_get_table (tld, _tld_tables);
-
- return tldtable;
-}
-
-#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
- (c) == 0xFF0E || (c) == 0xFF61)
-
-/**
- * tld_get_4 - extract top level domain part in input Unicode string
- * @in: Array of unicode code points to process. Does not need to be
- * zero terminated.
- * @inlen: Number of unicode code points.
- * @out: Zero terminated ascii result string pointer.
- *
- * Isolate the top-level domain of @in and return it as an ASCII
- * string in @out.
- *
- * Return value: Return %TLD_SUCCESS on success, or the corresponding
- * #Tld_rc error code otherwise.
- */
-int
-tld_get_4 (const uint32_t * in, size_t inlen, char **out)
-{
- const uint32_t *ipos;
- size_t olen;
-
- *out = NULL;
- if (!in || inlen == 0)
- return TLD_NODATA;
-
- ipos = &in[inlen - 1];
- olen = 0;
- /* Scan backwards for non(latin)letters. */
- while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
- (*ipos >= 0x61 && *ipos <= 0x7A)))
- ipos--, olen++;
-
- if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */
- {
- char *out_s = malloc (sizeof (char) * (olen + 1));
- char *opos = out_s;
-
- if (!opos)
- return TLD_MALLOC_ERROR;
-
- ipos++;
- /* Transcribe to lowercase ascii string. */
- for (; ipos < &in[inlen]; ipos++, opos++)
- *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
- *opos = 0;
- *out = out_s;
- return TLD_SUCCESS;
- }
-
- return TLD_NO_TLD;
-}
-
-/**
- * tld_get_4z - extract top level domain part in input Unicode string
- * @in: Zero terminated array of unicode code points to process.
- * @out: Zero terminated ascii result string pointer.
- *
- * Isolate the top-level domain of @in and return it as an ASCII
- * string in @out.
- *
- * Return value: Return %TLD_SUCCESS on success, or the corresponding
- * #Tld_rc error code otherwise.
- */
-int
-tld_get_4z (const uint32_t * in, char **out)
-{
- const uint32_t *ipos = in;
-
- if (!in)
- return TLD_NODATA;
-
- while (*ipos)
- ipos++;
-
- return tld_get_4 (in, ipos - in, out);
-}
-
-/**
- * tld_get_z - extract top level domain part in input string
- * @in: Zero terminated character array to process.
- * @out: Zero terminated ascii result string pointer.
- *
- * Isolate the top-level domain of @in and return it as an ASCII
- * string in @out. The input string @in may be UTF-8, ISO-8859-1 or
- * any ASCII compatible character encoding.
- *
- * Return value: Return %TLD_SUCCESS on success, or the corresponding
- * #Tld_rc error code otherwise.
- */
-int
-tld_get_z (const char *in, char **out)
-{
- uint32_t *iucs;
- size_t i, ilen;
- int rc;
-
- ilen = strlen (in);
- iucs = calloc (ilen, sizeof (*iucs));
-
- if (!iucs)
- return TLD_MALLOC_ERROR;
-
- for (i = 0; i < ilen; i++)
- iucs[i] = in[i];
-
- rc = tld_get_4 (iucs, ilen, out);
-
- free (iucs);
-
- return rc;
-}
-
-/*
- * tld_checkchar - verify that character is permitted
- * @ch: 32 bit unicode character to check.
- * @tld: A #Tld_table data structure to check @ch against.
- *
- * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
- * character in @tld.
- *
- * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
- * valid character for the TLD @tld or if @tld is %NULL,
- * %TLD_INVALID if @ch is invalid as defined by @tld.
- */
-static int
-_tld_checkchar (uint32_t ch, const Tld_table * tld)
-{
- const Tld_table_element *s, *e, *m;
-
- if (!tld)
- return TLD_SUCCESS;
-
- /* Check for [-a-z0-9.]. */
- if ((ch >= 0x61 && ch <= 0x7A) ||
- (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
- return TLD_SUCCESS;
-
- s = tld->valid;
- e = s + tld->nvalid;
- while (s < e)
- {
- m = s + ((e - s) >> 1);
- if (ch < m->start)
- e = m;
- else if (ch > m->end)
- s = m + 1;
- else
- return TLD_SUCCESS;
- }
-
- return TLD_INVALID;
-}
-
-/**
- * tld_check_4t - verify that characters are permitted
- * @in: Array of unicode code points to process. Does not need to be
- * zero terminated.
- * @inlen: Number of unicode code points.
- * @errpos: Position of offending character is returned here.
- * @tld: A #Tld_table data structure representing the restrictions for
- * which the input should be tested.
- *
- * Test each of the code points in @in for whether or not
- * they are allowed by the data structure in @tld, return
- * the position of the first character for which this is not
- * the case in @errpos.
- *
- * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
- * points are valid or when @tld is null, %TLD_INVALID if a
- * character is not allowed, or additional error codes on general
- * failure conditions.
- */
-int
-tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos,
- const Tld_table * tld)
-{
- const uint32_t *ipos;
- int rc;
-
- if (!tld) /* No data for TLD so everything is valid. */
- return TLD_SUCCESS;
-
- ipos = in;
- while (ipos < &in[inlen])
- {
- rc = _tld_checkchar (*ipos, tld);
- if (rc != TLD_SUCCESS)
- {
- if (errpos)
- *errpos = ipos - in;
- return rc;
- }
- ipos++;
- }
- return TLD_SUCCESS;
-}
-
-/**
- * tld_check_4tz - verify that characters are permitted
- * @in: Zero terminated array of unicode code points to process.
- * @errpos: Position of offending character is returned here.
- * @tld: A #Tld_table data structure representing the restrictions for
- * which the input should be tested.
- *
- * Test each of the code points in @in for whether or not
- * they are allowed by the data structure in @tld, return
- * the position of the first character for which this is not
- * the case in @errpos.
- *
- * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
- * points are valid or when @tld is null, %TLD_INVALID if a
- * character is not allowed, or additional error codes on general
- * failure conditions.
- */
-int
-tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld)
-{
- const uint32_t *ipos = in;
-
- if (!ipos)
- return TLD_NODATA;
-
- while (*ipos)
- ipos++;
-
- return tld_check_4t (in, ipos - in, errpos, tld);
-}
-
-/**
- * tld_check_4 - verify that characters are permitted
- * @in: Array of unicode code points to process. Does not need to be
- * zero terminated.
- * @inlen: Number of unicode code points.
- * @errpos: Position of offending character is returned here.
- * @overrides: A #Tld_table array of additional domain restriction
- * structures that complement and supersede the built-in information.
- *
- * Test each of the code points in @in for whether or not they are
- * allowed by the information in @overrides or by the built-in TLD
- * restriction data. When data for the same TLD is available both
- * internally and in @overrides, the information in @overrides takes
- * precedence. If several entries for a specific TLD are found, the
- * first one is used. If @overrides is %NULL, only the built-in
- * information is used. The position of the first offending character
- * is returned in @errpos.
- *
- * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
- * points are valid or when @tld is null, %TLD_INVALID if a
- * character is not allowed, or additional error codes on general
- * failure conditions.
- */
-int
-tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos,
- const Tld_table ** overrides)
-{
- const Tld_table *tld;
- char *domain;
- int rc;
-
- if (errpos)
- *errpos = 0;
-
- /* Get TLD name. */
- rc = tld_get_4 (in, inlen, &domain);
-
- if (rc != TLD_SUCCESS)
- {
- if (rc == TLD_NO_TLD) /* No TLD, say OK */
- return TLD_SUCCESS;
- else
- return rc;
- }
-
- /* Retrieve appropriate data structure. */
- tld = tld_default_table (domain, overrides);
- free (domain);
-
- return tld_check_4t (in, inlen, errpos, tld);
-}
-
-/**
- * tld_check_4z - verify that characters are permitted
- * @in: Zero-terminated array of unicode code points to process.
- * @errpos: Position of offending character is returned here.
- * @overrides: A #Tld_table array of additional domain restriction
- * structures that complement and supersede the built-in information.
- *
- * Test each of the code points in @in for whether or not they are
- * allowed by the information in @overrides or by the built-in TLD
- * restriction data. When data for the same TLD is available both
- * internally and in @overrides, the information in @overrides takes
- * precedence. If several entries for a specific TLD are found, the
- * first one is used. If @overrides is %NULL, only the built-in
- * information is used. The position of the first offending character
- * is returned in @errpos.
- *
- * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
- * points are valid or when @tld is null, %TLD_INVALID if a
- * character is not allowed, or additional error codes on general
- * failure conditions.
- */
-int
-tld_check_4z (const uint32_t * in, size_t * errpos,
- const Tld_table ** overrides)
-{
- const uint32_t *ipos = in;
-
- if (!ipos)
- return TLD_NODATA;
-
- while (*ipos)
- ipos++;
-
- return tld_check_4 (in, ipos - in, errpos, overrides);
-}
-
-/**
- * tld_check_8z - verify that characters are permitted
- * @in: Zero-terminated UTF8 string to process.
- * @errpos: Position of offending character is returned here.
- * @overrides: A #Tld_table array of additional domain restriction
- * structures that complement and supersede the built-in information.
- *
- * Test each of the characters in @in for whether or not they are
- * allowed by the information in @overrides or by the built-in TLD
- * restriction data. When data for the same TLD is available both
- * internally and in @overrides, the information in @overrides takes
- * precedence. If several entries for a specific TLD are found, the
- * first one is used. If @overrides is %NULL, only the built-in
- * information is used. The position of the first offending character
- * is returned in @errpos. Note that the error position refers to the
- * decoded character offset rather than the byte position in the
- * string.
- *
- * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all
- * characters are valid or when @tld is null, %TLD_INVALID if a
- * character is not allowed, or additional error codes on general
- * failure conditions.
- */
-int
-tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides)
-{
- uint32_t *iucs;
- size_t ilen;
- int rc;
-
- if (!in)
- return TLD_NODATA;
-
- iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
-
- if (!iucs)
- return TLD_MALLOC_ERROR;
-
- rc = tld_check_4 (iucs, ilen, errpos, overrides);
-
- free (iucs);
-
- return rc;
-}
-
-/**
- * tld_check_lz - verify that characters are permitted
- * @in: Zero-terminated string in the current locales encoding to process.
- * @errpos: Position of offending character is returned here.
- * @overrides: A #Tld_table array of additional domain restriction
- * structures that complement and supersede the built-in information.
- *
- * Test each of the characters in @in for whether or not they are
- * allowed by the information in @overrides or by the built-in TLD
- * restriction data. When data for the same TLD is available both
- * internally and in @overrides, the information in @overrides takes
- * precedence. If several entries for a specific TLD are found, the
- * first one is used. If @overrides is %NULL, only the built-in
- * information is used. The position of the first offending character
- * is returned in @errpos. Note that the error position refers to the
- * decoded character offset rather than the byte position in the
- * string.
- *
- * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all
- * characters are valid or when @tld is null, %TLD_INVALID if a
- * character is not allowed, or additional error codes on general
- * failure conditions.
- */
-int
-tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides)
-{
- char *utf8;
- int rc;
-
- if (!in)
- return TLD_NODATA;
-
- utf8 = stringprep_locale_to_utf8 (in);
- if (!utf8)
- return TLD_ICONV_ERROR;
-
-
- rc = tld_check_8z (utf8, errpos, overrides);
-
- free (utf8);
-
- return rc;
-}
-
-/**
- * Tld_rc:
- * @TLD_SUCCESS: Successful operation. This value is guaranteed to
- * always be zero, the remaining ones are only guaranteed to hold
- * non-zero values, for logical comparison purposes.
- * @TLD_INVALID: Invalid character found.
- * @TLD_NODATA: No input data was provided.
- * @TLD_MALLOC_ERROR: Error during memory allocation.
- * @TLD_ICONV_ERROR: Error during iconv string conversion.
- * @TLD_NO_TLD: No top-level domain found in domain string.
- *
- * Enumerated return codes of the TLD checking functions.
- * The value 0 is guaranteed to always correspond to success.
- */
+/* tld.c --- Handle TLD restriction checking.
+ * Copyright (C) 2004, 2005, 2006, 2007 Simon Josefsson.
+ * Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ *
+ * Author: Thomas Jacob, Internet24.de
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ *
+ */
+
+/* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */
+#include <stringprep.h>
+
+/* Get strcmp(). */
+#include <string.h>
+
+/* Get specifications. */
+#include <tld.h>
+
+/* Array of built-in domain restriction structures. See tlds.c. */
+extern const Tld_table *_tld_tables[];
+
+/**
+ * tld_get_table - get table for a TLD name in table
+ * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string.
+ * @tables: Zero terminated array of #Tld_table info-structures for
+ * TLDs.
+ *
+ * Get the TLD table for a named TLD by searching through the given
+ * TLD table array.
+ *
+ * Return value: Return structure corresponding to TLD @tld by going
+ * thru @tables, or return %NULL if no such structure is found.
+ */
+const Tld_table *
+tld_get_table (const char *tld, const Tld_table ** tables)
+{
+ const Tld_table **tldtable = NULL;
+
+ if (!tld || !tables)
+ return NULL;
+
+ for (tldtable = tables; *tldtable; tldtable++)
+ if (!strcmp ((*tldtable)->name, tld))
+ return *tldtable;
+
+ return NULL;
+}
+
+/**
+ * tld_default_table - get table for a TLD name
+ * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string.
+ * @overrides: Additional zero terminated array of #Tld_table
+ * info-structures for TLDs, or %NULL to only use library deault
+ * tables.
+ *
+ * Get the TLD table for a named TLD, using the internal defaults,
+ * possibly overrided by the (optional) supplied tables.
+ *
+ * Return value: Return structure corresponding to TLD @tld_str, first
+ * looking through @overrides then thru built-in list, or %NULL if
+ * no such structure found.
+ */
+const Tld_table *
+tld_default_table (const char *tld, const Tld_table ** overrides)
+{
+ const Tld_table *tldtable = NULL;
+
+ if (!tld)
+ return NULL;
+
+ if (overrides)
+ tldtable = tld_get_table (tld, overrides);
+
+ if (!tldtable)
+ tldtable = tld_get_table (tld, _tld_tables);
+
+ return tldtable;
+}
+
+#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \
+ (c) == 0xFF0E || (c) == 0xFF61)
+
+/**
+ * tld_get_4 - extract top level domain part in input Unicode string
+ * @in: Array of unicode code points to process. Does not need to be
+ * zero terminated.
+ * @inlen: Number of unicode code points.
+ * @out: Zero terminated ascii result string pointer.
+ *
+ * Isolate the top-level domain of @in and return it as an ASCII
+ * string in @out.
+ *
+ * Return value: Return %TLD_SUCCESS on success, or the corresponding
+ * #Tld_rc error code otherwise.
+ */
+int
+tld_get_4 (const uint32_t * in, size_t inlen, char **out)
+{
+ const uint32_t *ipos;
+ size_t olen;
+
+ *out = NULL;
+ if (!in || inlen == 0)
+ return TLD_NODATA;
+
+ ipos = &in[inlen - 1];
+ olen = 0;
+ /* Scan backwards for non(latin)letters. */
+ while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
+ (*ipos >= 0x61 && *ipos <= 0x7A)))
+ ipos--, olen++;
+
+ if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */
+ {
+ char *out_s = malloc (sizeof (char) * (olen + 1));
+ char *opos = out_s;
+
+ if (!opos)
+ return TLD_MALLOC_ERROR;
+
+ ipos++;
+ /* Transcribe to lowercase ascii string. */
+ for (; ipos < &in[inlen]; ipos++, opos++)
+ *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
+ *opos = 0;
+ *out = out_s;
+ return TLD_SUCCESS;
+ }
+
+ return TLD_NO_TLD;
+}
+
+/**
+ * tld_get_4z - extract top level domain part in input Unicode string
+ * @in: Zero terminated array of unicode code points to process.
+ * @out: Zero terminated ascii result string pointer.
+ *
+ * Isolate the top-level domain of @in and return it as an ASCII
+ * string in @out.
+ *
+ * Return value: Return %TLD_SUCCESS on success, or the corresponding
+ * #Tld_rc error code otherwise.
+ */
+int
+tld_get_4z (const uint32_t * in, char **out)
+{
+ const uint32_t *ipos = in;
+
+ if (!in)
+ return TLD_NODATA;
+
+ while (*ipos)
+ ipos++;
+
+ return tld_get_4 (in, ipos - in, out);
+}
+
+/**
+ * tld_get_z - extract top level domain part in input string
+ * @in: Zero terminated character array to process.
+ * @out: Zero terminated ascii result string pointer.
+ *
+ * Isolate the top-level domain of @in and return it as an ASCII
+ * string in @out. The input string @in may be UTF-8, ISO-8859-1 or
+ * any ASCII compatible character encoding.
+ *
+ * Return value: Return %TLD_SUCCESS on success, or the corresponding
+ * #Tld_rc error code otherwise.
+ */
+int
+tld_get_z (const char *in, char **out)
+{
+ uint32_t *iucs;
+ size_t i, ilen;
+ int rc;
+
+ ilen = strlen (in);
+ iucs = calloc (ilen, sizeof (*iucs));
+
+ if (!iucs)
+ return TLD_MALLOC_ERROR;
+
+ for (i = 0; i < ilen; i++)
+ iucs[i] = in[i];
+
+ rc = tld_get_4 (iucs, ilen, out);
+
+ free (iucs);
+
+ return rc;
+}
+
+/*
+ * tld_checkchar - verify that character is permitted
+ * @ch: 32 bit unicode character to check.
+ * @tld: A #Tld_table data structure to check @ch against.
+ *
+ * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid
+ * character in @tld.
+ *
+ * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a
+ * valid character for the TLD @tld or if @tld is %NULL,
+ * %TLD_INVALID if @ch is invalid as defined by @tld.
+ */
+static int
+_tld_checkchar (uint32_t ch, const Tld_table * tld)
+{
+ const Tld_table_element *s, *e, *m;
+
+ if (!tld)
+ return TLD_SUCCESS;
+
+ /* Check for [-a-z0-9.]. */
+ if ((ch >= 0x61 && ch <= 0x7A) ||
+ (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
+ return TLD_SUCCESS;
+
+ s = tld->valid;
+ e = s + tld->nvalid;
+ while (s < e)
+ {
+ m = s + ((e - s) >> 1);
+ if (ch < m->start)
+ e = m;
+ else if (ch > m->end)
+ s = m + 1;
+ else
+ return TLD_SUCCESS;
+ }
+
+ return TLD_INVALID;
+}
+
+/**
+ * tld_check_4t - verify that characters are permitted
+ * @in: Array of unicode code points to process. Does not need to be
+ * zero terminated.
+ * @inlen: Number of unicode code points.
+ * @errpos: Position of offending character is returned here.
+ * @tld: A #Tld_table data structure representing the restrictions for
+ * which the input should be tested.
+ *
+ * Test each of the code points in @in for whether or not
+ * they are allowed by the data structure in @tld, return
+ * the position of the first character for which this is not
+ * the case in @errpos.
+ *
+ * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
+ * points are valid or when @tld is null, %TLD_INVALID if a
+ * character is not allowed, or additional error codes on general
+ * failure conditions.
+ */
+int
+tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos,
+ const Tld_table * tld)
+{
+ const uint32_t *ipos;
+ int rc;
+
+ if (!tld) /* No data for TLD so everything is valid. */
+ return TLD_SUCCESS;
+
+ ipos = in;
+ while (ipos < &in[inlen])
+ {
+ rc = _tld_checkchar (*ipos, tld);
+ if (rc != TLD_SUCCESS)
+ {
+ if (errpos)
+ *errpos = ipos - in;
+ return rc;
+ }
+ ipos++;
+ }
+ return TLD_SUCCESS;
+}
+
+/**
+ * tld_check_4tz - verify that characters are permitted
+ * @in: Zero terminated array of unicode code points to process.
+ * @errpos: Position of offending character is returned here.
+ * @tld: A #Tld_table data structure representing the restrictions for
+ * which the input should be tested.
+ *
+ * Test each of the code points in @in for whether or not
+ * they are allowed by the data structure in @tld, return
+ * the position of the first character for which this is not
+ * the case in @errpos.
+ *
+ * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
+ * points are valid or when @tld is null, %TLD_INVALID if a
+ * character is not allowed, or additional error codes on general
+ * failure conditions.
+ */
+int
+tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld)
+{
+ const uint32_t *ipos = in;
+
+ if (!ipos)
+ return TLD_NODATA;
+
+ while (*ipos)
+ ipos++;
+
+ return tld_check_4t (in, ipos - in, errpos, tld);
+}
+
+/**
+ * tld_check_4 - verify that characters are permitted
+ * @in: Array of unicode code points to process. Does not need to be
+ * zero terminated.
+ * @inlen: Number of unicode code points.
+ * @errpos: Position of offending character is returned here.
+ * @overrides: A #Tld_table array of additional domain restriction
+ * structures that complement and supersede the built-in information.
+ *
+ * Test each of the code points in @in for whether or not they are
+ * allowed by the information in @overrides or by the built-in TLD
+ * restriction data. When data for the same TLD is available both
+ * internally and in @overrides, the information in @overrides takes
+ * precedence. If several entries for a specific TLD are found, the
+ * first one is used. If @overrides is %NULL, only the built-in
+ * information is used. The position of the first offending character
+ * is returned in @errpos.
+ *
+ * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
+ * points are valid or when @tld is null, %TLD_INVALID if a
+ * character is not allowed, or additional error codes on general
+ * failure conditions.
+ */
+int
+tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos,
+ const Tld_table ** overrides)
+{
+ const Tld_table *tld;
+ char *domain;
+ int rc;
+
+ if (errpos)
+ *errpos = 0;
+
+ /* Get TLD name. */
+ rc = tld_get_4 (in, inlen, &domain);
+
+ if (rc != TLD_SUCCESS)
+ {
+ if (rc == TLD_NO_TLD) /* No TLD, say OK */
+ return TLD_SUCCESS;
+ else
+ return rc;
+ }
+
+ /* Retrieve appropriate data structure. */
+ tld = tld_default_table (domain, overrides);
+ free (domain);
+
+ return tld_check_4t (in, inlen, errpos, tld);
+}
+
+/**
+ * tld_check_4z - verify that characters are permitted
+ * @in: Zero-terminated array of unicode code points to process.
+ * @errpos: Position of offending character is returned here.
+ * @overrides: A #Tld_table array of additional domain restriction
+ * structures that complement and supersede the built-in information.
+ *
+ * Test each of the code points in @in for whether or not they are
+ * allowed by the information in @overrides or by the built-in TLD
+ * restriction data. When data for the same TLD is available both
+ * internally and in @overrides, the information in @overrides takes
+ * precedence. If several entries for a specific TLD are found, the
+ * first one is used. If @overrides is %NULL, only the built-in
+ * information is used. The position of the first offending character
+ * is returned in @errpos.
+ *
+ * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code
+ * points are valid or when @tld is null, %TLD_INVALID if a
+ * character is not allowed, or additional error codes on general
+ * failure conditions.
+ */
+int
+tld_check_4z (const uint32_t * in, size_t * errpos,
+ const Tld_table ** overrides)
+{
+ const uint32_t *ipos = in;
+
+ if (!ipos)
+ return TLD_NODATA;
+
+ while (*ipos)
+ ipos++;
+
+ return tld_check_4 (in, ipos - in, errpos, overrides);
+}
+
+/**
+ * tld_check_8z - verify that characters are permitted
+ * @in: Zero-terminated UTF8 string to process.
+ * @errpos: Position of offending character is returned here.
+ * @overrides: A #Tld_table array of additional domain restriction
+ * structures that complement and supersede the built-in information.
+ *
+ * Test each of the characters in @in for whether or not they are
+ * allowed by the information in @overrides or by the built-in TLD
+ * restriction data. When data for the same TLD is available both
+ * internally and in @overrides, the information in @overrides takes
+ * precedence. If several entries for a specific TLD are found, the
+ * first one is used. If @overrides is %NULL, only the built-in
+ * information is used. The position of the first offending character
+ * is returned in @errpos. Note that the error position refers to the
+ * decoded character offset rather than the byte position in the
+ * string.
+ *
+ * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all
+ * characters are valid or when @tld is null, %TLD_INVALID if a
+ * character is not allowed, or additional error codes on general
+ * failure conditions.
+ */
+int
+tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides)
+{
+ uint32_t *iucs;
+ size_t ilen;
+ int rc;
+
+ if (!in)
+ return TLD_NODATA;
+
+ iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen);
+
+ if (!iucs)
+ return TLD_MALLOC_ERROR;
+
+ rc = tld_check_4 (iucs, ilen, errpos, overrides);
+
+ free (iucs);
+
+ return rc;
+}
+
+/**
+ * tld_check_lz - verify that characters are permitted
+ * @in: Zero-terminated string in the current locales encoding to process.
+ * @errpos: Position of offending character is returned here.
+ * @overrides: A #Tld_table array of additional domain restriction
+ * structures that complement and supersede the built-in information.
+ *
+ * Test each of the characters in @in for whether or not they are
+ * allowed by the information in @overrides or by the built-in TLD
+ * restriction data. When data for the same TLD is available both
+ * internally and in @overrides, the information in @overrides takes
+ * precedence. If several entries for a specific TLD are found, the
+ * first one is used. If @overrides is %NULL, only the built-in
+ * information is used. The position of the first offending character
+ * is returned in @errpos. Note that the error position refers to the
+ * decoded character offset rather than the byte position in the
+ * string.
+ *
+ * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all
+ * characters are valid or when @tld is null, %TLD_INVALID if a
+ * character is not allowed, or additional error codes on general
+ * failure conditions.
+ */
+int
+tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides)
+{
+ char *utf8;
+ int rc;
+
+ if (!in)
+ return TLD_NODATA;
+
+ utf8 = stringprep_locale_to_utf8 (in);
+ if (!utf8)
+ return TLD_ICONV_ERROR;
+
+
+ rc = tld_check_8z (utf8, errpos, overrides);
+
+ free (utf8);
+
+ return rc;
+}
+
+/**
+ * Tld_rc:
+ * @TLD_SUCCESS: Successful operation. This value is guaranteed to
+ * always be zero, the remaining ones are only guaranteed to hold
+ * non-zero values, for logical comparison purposes.
+ * @TLD_INVALID: Invalid character found.
+ * @TLD_NODATA: No input data was provided.
+ * @TLD_MALLOC_ERROR: Error during memory allocation.
+ * @TLD_ICONV_ERROR: Error during iconv string conversion.
+ * @TLD_NO_TLD: No top-level domain found in domain string.
+ *
+ * Enumerated return codes of the TLD checking functions.
+ * The value 0 is guaranteed to always correspond to success.
+ */