diff options
author | denplusplus <denplusplus@yandex-team.ru> | 2022-02-10 16:47:34 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:34 +0300 |
commit | 57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a (patch) | |
tree | cc63639f8e502db19a82c20e2861c6d1edbf9fea /contrib/libs/libidn/tld.c | |
parent | 464ba3814a83db4f2d5327393b0b6eaf0c86bfd7 (diff) | |
download | ydb-57c20d143e8a438cd76b9fdc3ca2e8ee3ac1f32a.tar.gz |
Restoring authorship annotation for <denplusplus@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/libidn/tld.c')
-rw-r--r-- | contrib/libs/libidn/tld.c | 1028 |
1 files changed, 514 insertions, 514 deletions
diff --git a/contrib/libs/libidn/tld.c b/contrib/libs/libidn/tld.c index 4e894663fb..88a26b54ca 100644 --- a/contrib/libs/libidn/tld.c +++ b/contrib/libs/libidn/tld.c @@ -1,514 +1,514 @@ -/* tld.c --- Handle TLD restriction checking. - * Copyright (C) 2004, 2005, 2006, 2007 Simon Josefsson. - * Copyright (C) 2003, 2004 Free Software Foundation, Inc. - * - * Author: Thomas Jacob, Internet24.de - * - * This file is part of GNU Libidn. - * - * GNU Libidn is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * GNU Libidn is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GNU Libidn; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - * - */ - -/* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */ -#include <stringprep.h> - -/* Get strcmp(). */ -#include <string.h> - -/* Get specifications. */ -#include <tld.h> - -/* Array of built-in domain restriction structures. See tlds.c. */ -extern const Tld_table *_tld_tables[]; - -/** - * tld_get_table - get table for a TLD name in table - * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string. - * @tables: Zero terminated array of #Tld_table info-structures for - * TLDs. - * - * Get the TLD table for a named TLD by searching through the given - * TLD table array. - * - * Return value: Return structure corresponding to TLD @tld by going - * thru @tables, or return %NULL if no such structure is found. - */ -const Tld_table * -tld_get_table (const char *tld, const Tld_table ** tables) -{ - const Tld_table **tldtable = NULL; - - if (!tld || !tables) - return NULL; - - for (tldtable = tables; *tldtable; tldtable++) - if (!strcmp ((*tldtable)->name, tld)) - return *tldtable; - - return NULL; -} - -/** - * tld_default_table - get table for a TLD name - * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string. - * @overrides: Additional zero terminated array of #Tld_table - * info-structures for TLDs, or %NULL to only use library deault - * tables. - * - * Get the TLD table for a named TLD, using the internal defaults, - * possibly overrided by the (optional) supplied tables. - * - * Return value: Return structure corresponding to TLD @tld_str, first - * looking through @overrides then thru built-in list, or %NULL if - * no such structure found. - */ -const Tld_table * -tld_default_table (const char *tld, const Tld_table ** overrides) -{ - const Tld_table *tldtable = NULL; - - if (!tld) - return NULL; - - if (overrides) - tldtable = tld_get_table (tld, overrides); - - if (!tldtable) - tldtable = tld_get_table (tld, _tld_tables); - - return tldtable; -} - -#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \ - (c) == 0xFF0E || (c) == 0xFF61) - -/** - * tld_get_4 - extract top level domain part in input Unicode string - * @in: Array of unicode code points to process. Does not need to be - * zero terminated. - * @inlen: Number of unicode code points. - * @out: Zero terminated ascii result string pointer. - * - * Isolate the top-level domain of @in and return it as an ASCII - * string in @out. - * - * Return value: Return %TLD_SUCCESS on success, or the corresponding - * #Tld_rc error code otherwise. - */ -int -tld_get_4 (const uint32_t * in, size_t inlen, char **out) -{ - const uint32_t *ipos; - size_t olen; - - *out = NULL; - if (!in || inlen == 0) - return TLD_NODATA; - - ipos = &in[inlen - 1]; - olen = 0; - /* Scan backwards for non(latin)letters. */ - while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) || - (*ipos >= 0x61 && *ipos <= 0x7A))) - ipos--, olen++; - - if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */ - { - char *out_s = malloc (sizeof (char) * (olen + 1)); - char *opos = out_s; - - if (!opos) - return TLD_MALLOC_ERROR; - - ipos++; - /* Transcribe to lowercase ascii string. */ - for (; ipos < &in[inlen]; ipos++, opos++) - *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20; - *opos = 0; - *out = out_s; - return TLD_SUCCESS; - } - - return TLD_NO_TLD; -} - -/** - * tld_get_4z - extract top level domain part in input Unicode string - * @in: Zero terminated array of unicode code points to process. - * @out: Zero terminated ascii result string pointer. - * - * Isolate the top-level domain of @in and return it as an ASCII - * string in @out. - * - * Return value: Return %TLD_SUCCESS on success, or the corresponding - * #Tld_rc error code otherwise. - */ -int -tld_get_4z (const uint32_t * in, char **out) -{ - const uint32_t *ipos = in; - - if (!in) - return TLD_NODATA; - - while (*ipos) - ipos++; - - return tld_get_4 (in, ipos - in, out); -} - -/** - * tld_get_z - extract top level domain part in input string - * @in: Zero terminated character array to process. - * @out: Zero terminated ascii result string pointer. - * - * Isolate the top-level domain of @in and return it as an ASCII - * string in @out. The input string @in may be UTF-8, ISO-8859-1 or - * any ASCII compatible character encoding. - * - * Return value: Return %TLD_SUCCESS on success, or the corresponding - * #Tld_rc error code otherwise. - */ -int -tld_get_z (const char *in, char **out) -{ - uint32_t *iucs; - size_t i, ilen; - int rc; - - ilen = strlen (in); - iucs = calloc (ilen, sizeof (*iucs)); - - if (!iucs) - return TLD_MALLOC_ERROR; - - for (i = 0; i < ilen; i++) - iucs[i] = in[i]; - - rc = tld_get_4 (iucs, ilen, out); - - free (iucs); - - return rc; -} - -/* - * tld_checkchar - verify that character is permitted - * @ch: 32 bit unicode character to check. - * @tld: A #Tld_table data structure to check @ch against. - * - * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid - * character in @tld. - * - * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a - * valid character for the TLD @tld or if @tld is %NULL, - * %TLD_INVALID if @ch is invalid as defined by @tld. - */ -static int -_tld_checkchar (uint32_t ch, const Tld_table * tld) -{ - const Tld_table_element *s, *e, *m; - - if (!tld) - return TLD_SUCCESS; - - /* Check for [-a-z0-9.]. */ - if ((ch >= 0x61 && ch <= 0x7A) || - (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch)) - return TLD_SUCCESS; - - s = tld->valid; - e = s + tld->nvalid; - while (s < e) - { - m = s + ((e - s) >> 1); - if (ch < m->start) - e = m; - else if (ch > m->end) - s = m + 1; - else - return TLD_SUCCESS; - } - - return TLD_INVALID; -} - -/** - * tld_check_4t - verify that characters are permitted - * @in: Array of unicode code points to process. Does not need to be - * zero terminated. - * @inlen: Number of unicode code points. - * @errpos: Position of offending character is returned here. - * @tld: A #Tld_table data structure representing the restrictions for - * which the input should be tested. - * - * Test each of the code points in @in for whether or not - * they are allowed by the data structure in @tld, return - * the position of the first character for which this is not - * the case in @errpos. - * - * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code - * points are valid or when @tld is null, %TLD_INVALID if a - * character is not allowed, or additional error codes on general - * failure conditions. - */ -int -tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos, - const Tld_table * tld) -{ - const uint32_t *ipos; - int rc; - - if (!tld) /* No data for TLD so everything is valid. */ - return TLD_SUCCESS; - - ipos = in; - while (ipos < &in[inlen]) - { - rc = _tld_checkchar (*ipos, tld); - if (rc != TLD_SUCCESS) - { - if (errpos) - *errpos = ipos - in; - return rc; - } - ipos++; - } - return TLD_SUCCESS; -} - -/** - * tld_check_4tz - verify that characters are permitted - * @in: Zero terminated array of unicode code points to process. - * @errpos: Position of offending character is returned here. - * @tld: A #Tld_table data structure representing the restrictions for - * which the input should be tested. - * - * Test each of the code points in @in for whether or not - * they are allowed by the data structure in @tld, return - * the position of the first character for which this is not - * the case in @errpos. - * - * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code - * points are valid or when @tld is null, %TLD_INVALID if a - * character is not allowed, or additional error codes on general - * failure conditions. - */ -int -tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld) -{ - const uint32_t *ipos = in; - - if (!ipos) - return TLD_NODATA; - - while (*ipos) - ipos++; - - return tld_check_4t (in, ipos - in, errpos, tld); -} - -/** - * tld_check_4 - verify that characters are permitted - * @in: Array of unicode code points to process. Does not need to be - * zero terminated. - * @inlen: Number of unicode code points. - * @errpos: Position of offending character is returned here. - * @overrides: A #Tld_table array of additional domain restriction - * structures that complement and supersede the built-in information. - * - * Test each of the code points in @in for whether or not they are - * allowed by the information in @overrides or by the built-in TLD - * restriction data. When data for the same TLD is available both - * internally and in @overrides, the information in @overrides takes - * precedence. If several entries for a specific TLD are found, the - * first one is used. If @overrides is %NULL, only the built-in - * information is used. The position of the first offending character - * is returned in @errpos. - * - * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code - * points are valid or when @tld is null, %TLD_INVALID if a - * character is not allowed, or additional error codes on general - * failure conditions. - */ -int -tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos, - const Tld_table ** overrides) -{ - const Tld_table *tld; - char *domain; - int rc; - - if (errpos) - *errpos = 0; - - /* Get TLD name. */ - rc = tld_get_4 (in, inlen, &domain); - - if (rc != TLD_SUCCESS) - { - if (rc == TLD_NO_TLD) /* No TLD, say OK */ - return TLD_SUCCESS; - else - return rc; - } - - /* Retrieve appropriate data structure. */ - tld = tld_default_table (domain, overrides); - free (domain); - - return tld_check_4t (in, inlen, errpos, tld); -} - -/** - * tld_check_4z - verify that characters are permitted - * @in: Zero-terminated array of unicode code points to process. - * @errpos: Position of offending character is returned here. - * @overrides: A #Tld_table array of additional domain restriction - * structures that complement and supersede the built-in information. - * - * Test each of the code points in @in for whether or not they are - * allowed by the information in @overrides or by the built-in TLD - * restriction data. When data for the same TLD is available both - * internally and in @overrides, the information in @overrides takes - * precedence. If several entries for a specific TLD are found, the - * first one is used. If @overrides is %NULL, only the built-in - * information is used. The position of the first offending character - * is returned in @errpos. - * - * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code - * points are valid or when @tld is null, %TLD_INVALID if a - * character is not allowed, or additional error codes on general - * failure conditions. - */ -int -tld_check_4z (const uint32_t * in, size_t * errpos, - const Tld_table ** overrides) -{ - const uint32_t *ipos = in; - - if (!ipos) - return TLD_NODATA; - - while (*ipos) - ipos++; - - return tld_check_4 (in, ipos - in, errpos, overrides); -} - -/** - * tld_check_8z - verify that characters are permitted - * @in: Zero-terminated UTF8 string to process. - * @errpos: Position of offending character is returned here. - * @overrides: A #Tld_table array of additional domain restriction - * structures that complement and supersede the built-in information. - * - * Test each of the characters in @in for whether or not they are - * allowed by the information in @overrides or by the built-in TLD - * restriction data. When data for the same TLD is available both - * internally and in @overrides, the information in @overrides takes - * precedence. If several entries for a specific TLD are found, the - * first one is used. If @overrides is %NULL, only the built-in - * information is used. The position of the first offending character - * is returned in @errpos. Note that the error position refers to the - * decoded character offset rather than the byte position in the - * string. - * - * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all - * characters are valid or when @tld is null, %TLD_INVALID if a - * character is not allowed, or additional error codes on general - * failure conditions. - */ -int -tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides) -{ - uint32_t *iucs; - size_t ilen; - int rc; - - if (!in) - return TLD_NODATA; - - iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen); - - if (!iucs) - return TLD_MALLOC_ERROR; - - rc = tld_check_4 (iucs, ilen, errpos, overrides); - - free (iucs); - - return rc; -} - -/** - * tld_check_lz - verify that characters are permitted - * @in: Zero-terminated string in the current locales encoding to process. - * @errpos: Position of offending character is returned here. - * @overrides: A #Tld_table array of additional domain restriction - * structures that complement and supersede the built-in information. - * - * Test each of the characters in @in for whether or not they are - * allowed by the information in @overrides or by the built-in TLD - * restriction data. When data for the same TLD is available both - * internally and in @overrides, the information in @overrides takes - * precedence. If several entries for a specific TLD are found, the - * first one is used. If @overrides is %NULL, only the built-in - * information is used. The position of the first offending character - * is returned in @errpos. Note that the error position refers to the - * decoded character offset rather than the byte position in the - * string. - * - * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all - * characters are valid or when @tld is null, %TLD_INVALID if a - * character is not allowed, or additional error codes on general - * failure conditions. - */ -int -tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides) -{ - char *utf8; - int rc; - - if (!in) - return TLD_NODATA; - - utf8 = stringprep_locale_to_utf8 (in); - if (!utf8) - return TLD_ICONV_ERROR; - - - rc = tld_check_8z (utf8, errpos, overrides); - - free (utf8); - - return rc; -} - -/** - * Tld_rc: - * @TLD_SUCCESS: Successful operation. This value is guaranteed to - * always be zero, the remaining ones are only guaranteed to hold - * non-zero values, for logical comparison purposes. - * @TLD_INVALID: Invalid character found. - * @TLD_NODATA: No input data was provided. - * @TLD_MALLOC_ERROR: Error during memory allocation. - * @TLD_ICONV_ERROR: Error during iconv string conversion. - * @TLD_NO_TLD: No top-level domain found in domain string. - * - * Enumerated return codes of the TLD checking functions. - * The value 0 is guaranteed to always correspond to success. - */ +/* tld.c --- Handle TLD restriction checking. + * Copyright (C) 2004, 2005, 2006, 2007 Simon Josefsson. + * Copyright (C) 2003, 2004 Free Software Foundation, Inc. + * + * Author: Thomas Jacob, Internet24.de + * + * This file is part of GNU Libidn. + * + * GNU Libidn is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * GNU Libidn is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with GNU Libidn; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + */ + +/* Get stringprep_utf8_to_ucs4, stringprep_locale_to_utf8. */ +#include <stringprep.h> + +/* Get strcmp(). */ +#include <string.h> + +/* Get specifications. */ +#include <tld.h> + +/* Array of built-in domain restriction structures. See tlds.c. */ +extern const Tld_table *_tld_tables[]; + +/** + * tld_get_table - get table for a TLD name in table + * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string. + * @tables: Zero terminated array of #Tld_table info-structures for + * TLDs. + * + * Get the TLD table for a named TLD by searching through the given + * TLD table array. + * + * Return value: Return structure corresponding to TLD @tld by going + * thru @tables, or return %NULL if no such structure is found. + */ +const Tld_table * +tld_get_table (const char *tld, const Tld_table ** tables) +{ + const Tld_table **tldtable = NULL; + + if (!tld || !tables) + return NULL; + + for (tldtable = tables; *tldtable; tldtable++) + if (!strcmp ((*tldtable)->name, tld)) + return *tldtable; + + return NULL; +} + +/** + * tld_default_table - get table for a TLD name + * @tld: TLD name (e.g. "com") as zero terminated ASCII byte string. + * @overrides: Additional zero terminated array of #Tld_table + * info-structures for TLDs, or %NULL to only use library deault + * tables. + * + * Get the TLD table for a named TLD, using the internal defaults, + * possibly overrided by the (optional) supplied tables. + * + * Return value: Return structure corresponding to TLD @tld_str, first + * looking through @overrides then thru built-in list, or %NULL if + * no such structure found. + */ +const Tld_table * +tld_default_table (const char *tld, const Tld_table ** overrides) +{ + const Tld_table *tldtable = NULL; + + if (!tld) + return NULL; + + if (overrides) + tldtable = tld_get_table (tld, overrides); + + if (!tldtable) + tldtable = tld_get_table (tld, _tld_tables); + + return tldtable; +} + +#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 || \ + (c) == 0xFF0E || (c) == 0xFF61) + +/** + * tld_get_4 - extract top level domain part in input Unicode string + * @in: Array of unicode code points to process. Does not need to be + * zero terminated. + * @inlen: Number of unicode code points. + * @out: Zero terminated ascii result string pointer. + * + * Isolate the top-level domain of @in and return it as an ASCII + * string in @out. + * + * Return value: Return %TLD_SUCCESS on success, or the corresponding + * #Tld_rc error code otherwise. + */ +int +tld_get_4 (const uint32_t * in, size_t inlen, char **out) +{ + const uint32_t *ipos; + size_t olen; + + *out = NULL; + if (!in || inlen == 0) + return TLD_NODATA; + + ipos = &in[inlen - 1]; + olen = 0; + /* Scan backwards for non(latin)letters. */ + while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) || + (*ipos >= 0x61 && *ipos <= 0x7A))) + ipos--, olen++; + + if (olen > 0 && DOTP (*ipos)) /* Found something that appears a TLD. */ + { + char *out_s = malloc (sizeof (char) * (olen + 1)); + char *opos = out_s; + + if (!opos) + return TLD_MALLOC_ERROR; + + ipos++; + /* Transcribe to lowercase ascii string. */ + for (; ipos < &in[inlen]; ipos++, opos++) + *opos = *ipos > 0x5A ? *ipos : *ipos + 0x20; + *opos = 0; + *out = out_s; + return TLD_SUCCESS; + } + + return TLD_NO_TLD; +} + +/** + * tld_get_4z - extract top level domain part in input Unicode string + * @in: Zero terminated array of unicode code points to process. + * @out: Zero terminated ascii result string pointer. + * + * Isolate the top-level domain of @in and return it as an ASCII + * string in @out. + * + * Return value: Return %TLD_SUCCESS on success, or the corresponding + * #Tld_rc error code otherwise. + */ +int +tld_get_4z (const uint32_t * in, char **out) +{ + const uint32_t *ipos = in; + + if (!in) + return TLD_NODATA; + + while (*ipos) + ipos++; + + return tld_get_4 (in, ipos - in, out); +} + +/** + * tld_get_z - extract top level domain part in input string + * @in: Zero terminated character array to process. + * @out: Zero terminated ascii result string pointer. + * + * Isolate the top-level domain of @in and return it as an ASCII + * string in @out. The input string @in may be UTF-8, ISO-8859-1 or + * any ASCII compatible character encoding. + * + * Return value: Return %TLD_SUCCESS on success, or the corresponding + * #Tld_rc error code otherwise. + */ +int +tld_get_z (const char *in, char **out) +{ + uint32_t *iucs; + size_t i, ilen; + int rc; + + ilen = strlen (in); + iucs = calloc (ilen, sizeof (*iucs)); + + if (!iucs) + return TLD_MALLOC_ERROR; + + for (i = 0; i < ilen; i++) + iucs[i] = in[i]; + + rc = tld_get_4 (iucs, ilen, out); + + free (iucs); + + return rc; +} + +/* + * tld_checkchar - verify that character is permitted + * @ch: 32 bit unicode character to check. + * @tld: A #Tld_table data structure to check @ch against. + * + * Verify if @ch is either in [a-z0-9-.] or mentioned as a valid + * character in @tld. + * + * Return value: Return the #Tld_rc value %TLD_SUCCESS if @ch is a + * valid character for the TLD @tld or if @tld is %NULL, + * %TLD_INVALID if @ch is invalid as defined by @tld. + */ +static int +_tld_checkchar (uint32_t ch, const Tld_table * tld) +{ + const Tld_table_element *s, *e, *m; + + if (!tld) + return TLD_SUCCESS; + + /* Check for [-a-z0-9.]. */ + if ((ch >= 0x61 && ch <= 0x7A) || + (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch)) + return TLD_SUCCESS; + + s = tld->valid; + e = s + tld->nvalid; + while (s < e) + { + m = s + ((e - s) >> 1); + if (ch < m->start) + e = m; + else if (ch > m->end) + s = m + 1; + else + return TLD_SUCCESS; + } + + return TLD_INVALID; +} + +/** + * tld_check_4t - verify that characters are permitted + * @in: Array of unicode code points to process. Does not need to be + * zero terminated. + * @inlen: Number of unicode code points. + * @errpos: Position of offending character is returned here. + * @tld: A #Tld_table data structure representing the restrictions for + * which the input should be tested. + * + * Test each of the code points in @in for whether or not + * they are allowed by the data structure in @tld, return + * the position of the first character for which this is not + * the case in @errpos. + * + * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code + * points are valid or when @tld is null, %TLD_INVALID if a + * character is not allowed, or additional error codes on general + * failure conditions. + */ +int +tld_check_4t (const uint32_t * in, size_t inlen, size_t * errpos, + const Tld_table * tld) +{ + const uint32_t *ipos; + int rc; + + if (!tld) /* No data for TLD so everything is valid. */ + return TLD_SUCCESS; + + ipos = in; + while (ipos < &in[inlen]) + { + rc = _tld_checkchar (*ipos, tld); + if (rc != TLD_SUCCESS) + { + if (errpos) + *errpos = ipos - in; + return rc; + } + ipos++; + } + return TLD_SUCCESS; +} + +/** + * tld_check_4tz - verify that characters are permitted + * @in: Zero terminated array of unicode code points to process. + * @errpos: Position of offending character is returned here. + * @tld: A #Tld_table data structure representing the restrictions for + * which the input should be tested. + * + * Test each of the code points in @in for whether or not + * they are allowed by the data structure in @tld, return + * the position of the first character for which this is not + * the case in @errpos. + * + * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code + * points are valid or when @tld is null, %TLD_INVALID if a + * character is not allowed, or additional error codes on general + * failure conditions. + */ +int +tld_check_4tz (const uint32_t * in, size_t * errpos, const Tld_table * tld) +{ + const uint32_t *ipos = in; + + if (!ipos) + return TLD_NODATA; + + while (*ipos) + ipos++; + + return tld_check_4t (in, ipos - in, errpos, tld); +} + +/** + * tld_check_4 - verify that characters are permitted + * @in: Array of unicode code points to process. Does not need to be + * zero terminated. + * @inlen: Number of unicode code points. + * @errpos: Position of offending character is returned here. + * @overrides: A #Tld_table array of additional domain restriction + * structures that complement and supersede the built-in information. + * + * Test each of the code points in @in for whether or not they are + * allowed by the information in @overrides or by the built-in TLD + * restriction data. When data for the same TLD is available both + * internally and in @overrides, the information in @overrides takes + * precedence. If several entries for a specific TLD are found, the + * first one is used. If @overrides is %NULL, only the built-in + * information is used. The position of the first offending character + * is returned in @errpos. + * + * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code + * points are valid or when @tld is null, %TLD_INVALID if a + * character is not allowed, or additional error codes on general + * failure conditions. + */ +int +tld_check_4 (const uint32_t * in, size_t inlen, size_t * errpos, + const Tld_table ** overrides) +{ + const Tld_table *tld; + char *domain; + int rc; + + if (errpos) + *errpos = 0; + + /* Get TLD name. */ + rc = tld_get_4 (in, inlen, &domain); + + if (rc != TLD_SUCCESS) + { + if (rc == TLD_NO_TLD) /* No TLD, say OK */ + return TLD_SUCCESS; + else + return rc; + } + + /* Retrieve appropriate data structure. */ + tld = tld_default_table (domain, overrides); + free (domain); + + return tld_check_4t (in, inlen, errpos, tld); +} + +/** + * tld_check_4z - verify that characters are permitted + * @in: Zero-terminated array of unicode code points to process. + * @errpos: Position of offending character is returned here. + * @overrides: A #Tld_table array of additional domain restriction + * structures that complement and supersede the built-in information. + * + * Test each of the code points in @in for whether or not they are + * allowed by the information in @overrides or by the built-in TLD + * restriction data. When data for the same TLD is available both + * internally and in @overrides, the information in @overrides takes + * precedence. If several entries for a specific TLD are found, the + * first one is used. If @overrides is %NULL, only the built-in + * information is used. The position of the first offending character + * is returned in @errpos. + * + * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all code + * points are valid or when @tld is null, %TLD_INVALID if a + * character is not allowed, or additional error codes on general + * failure conditions. + */ +int +tld_check_4z (const uint32_t * in, size_t * errpos, + const Tld_table ** overrides) +{ + const uint32_t *ipos = in; + + if (!ipos) + return TLD_NODATA; + + while (*ipos) + ipos++; + + return tld_check_4 (in, ipos - in, errpos, overrides); +} + +/** + * tld_check_8z - verify that characters are permitted + * @in: Zero-terminated UTF8 string to process. + * @errpos: Position of offending character is returned here. + * @overrides: A #Tld_table array of additional domain restriction + * structures that complement and supersede the built-in information. + * + * Test each of the characters in @in for whether or not they are + * allowed by the information in @overrides or by the built-in TLD + * restriction data. When data for the same TLD is available both + * internally and in @overrides, the information in @overrides takes + * precedence. If several entries for a specific TLD are found, the + * first one is used. If @overrides is %NULL, only the built-in + * information is used. The position of the first offending character + * is returned in @errpos. Note that the error position refers to the + * decoded character offset rather than the byte position in the + * string. + * + * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all + * characters are valid or when @tld is null, %TLD_INVALID if a + * character is not allowed, or additional error codes on general + * failure conditions. + */ +int +tld_check_8z (const char *in, size_t * errpos, const Tld_table ** overrides) +{ + uint32_t *iucs; + size_t ilen; + int rc; + + if (!in) + return TLD_NODATA; + + iucs = stringprep_utf8_to_ucs4 (in, -1, &ilen); + + if (!iucs) + return TLD_MALLOC_ERROR; + + rc = tld_check_4 (iucs, ilen, errpos, overrides); + + free (iucs); + + return rc; +} + +/** + * tld_check_lz - verify that characters are permitted + * @in: Zero-terminated string in the current locales encoding to process. + * @errpos: Position of offending character is returned here. + * @overrides: A #Tld_table array of additional domain restriction + * structures that complement and supersede the built-in information. + * + * Test each of the characters in @in for whether or not they are + * allowed by the information in @overrides or by the built-in TLD + * restriction data. When data for the same TLD is available both + * internally and in @overrides, the information in @overrides takes + * precedence. If several entries for a specific TLD are found, the + * first one is used. If @overrides is %NULL, only the built-in + * information is used. The position of the first offending character + * is returned in @errpos. Note that the error position refers to the + * decoded character offset rather than the byte position in the + * string. + * + * Return value: Returns the #Tld_rc value %TLD_SUCCESS if all + * characters are valid or when @tld is null, %TLD_INVALID if a + * character is not allowed, or additional error codes on general + * failure conditions. + */ +int +tld_check_lz (const char *in, size_t * errpos, const Tld_table ** overrides) +{ + char *utf8; + int rc; + + if (!in) + return TLD_NODATA; + + utf8 = stringprep_locale_to_utf8 (in); + if (!utf8) + return TLD_ICONV_ERROR; + + + rc = tld_check_8z (utf8, errpos, overrides); + + free (utf8); + + return rc; +} + +/** + * Tld_rc: + * @TLD_SUCCESS: Successful operation. This value is guaranteed to + * always be zero, the remaining ones are only guaranteed to hold + * non-zero values, for logical comparison purposes. + * @TLD_INVALID: Invalid character found. + * @TLD_NODATA: No input data was provided. + * @TLD_MALLOC_ERROR: Error during memory allocation. + * @TLD_ICONV_ERROR: Error during iconv string conversion. + * @TLD_NO_TLD: No top-level domain found in domain string. + * + * Enumerated return codes of the TLD checking functions. + * The value 0 is guaranteed to always correspond to success. + */ |