Update ICU to 70.1

author: romankoshelev <romankoshelev@yandex-team.com> 2023-08-09 20:07:20 +0300
committer: romankoshelev <romankoshelev@yandex-team.com> 2023-08-09 20:59:13 +0300
commit: fd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch)
tree: f582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/io
parent: bf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff)
download: ydb-fd82fb12fb45e71a02c628e45b12c50c0dd0d308.tar.gz
6 files changed, 62 insertions, 20 deletions
diff --git a/contrib/libs/icu/io/ucln_io.cpp b/contrib/libs/icu/io/ucln_io.cpp
index 5cd367cfad..c1307b5d97 100644
--- a/contrib/libs/icu/io/ucln_io.cpp
+++ b/contrib/libs/icu/io/ucln_io.cpp
@@ -23,7 +23,7 @@
 #include "uassert.h"
 
 #ifndef U_IO_IMPLEMENTATION
-#error U_IO_IMPLEMENTATION not set - must be set for all ICU source files in io/ - see http://userguide.icu-project.org/howtouseicu
+#error U_IO_IMPLEMENTATION not set - must be set for all ICU source files in io/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
 #endif
 
 
@@ -69,4 +69,3 @@ void ucln_io_registerCleanup(ECleanupIOType type,
     ucln_registerAutomaticCleanup();
 #endif
 }
-
diff --git a/contrib/libs/icu/io/ufile.cpp b/contrib/libs/icu/io/ufile.cpp
index 1d8c221cdc..607601935c 100644
--- a/contrib/libs/icu/io/ufile.cpp
+++ b/contrib/libs/icu/io/ufile.cpp
@@ -40,6 +40,7 @@
 #include "unicode/ures.h"
 #include "unicode/ucnv.h"
 #include "unicode/ustring.h"
+#include "unicode/unistr.h"
 #include "cstring.h"
 #include "cmemory.h"
 
@@ -142,18 +143,42 @@ u_fopen(const char    *filename,
     return result; /* not a file leak */
 }
 
+// FILENAME_BUF_MAX represents the largest size that we are willing to use for a
+// stack-allocated buffer to contain a file name or path. If PATH_MAX (POSIX) or MAX_PATH
+// (Windows) are defined and are smaller than this we will use their defined value;
+// otherwise, we will use FILENAME_BUF_MAX for the stack-allocated buffer, and dynamically
+// allocate a buffer for any file name or path that is that length or longer.
+#define FILENAME_BUF_MAX 296
+#if defined PATH_MAX && PATH_MAX < FILENAME_BUF_MAX
+#define FILENAME_BUF_CAPACITY PATH_MAX
+#elif defined MAX_PATH && MAX_PATH < FILENAME_BUF_MAX
+#define FILENAME_BUF_CAPACITY MAX_PATH
+#else
+#define FILENAME_BUF_CAPACITY FILENAME_BUF_MAX
+#endif
+
 U_CAPI UFILE* U_EXPORT2
 u_fopen_u(const UChar   *filename,
         const char    *perm,
         const char    *locale,
         const char    *codepage)
 {
-    UFILE     *result;
-    char buffer[256];
-
-    u_austrcpy(buffer, filename);
+    UFILE *result;
+    char buffer[FILENAME_BUF_CAPACITY];
+    char *filenameBuffer = buffer;
+
+    icu::UnicodeString filenameString(true, filename, -1); // readonly aliasing, does not allocate memory
+    // extract with conversion to platform default codepage, return full length (not including 0 termination)
+    int32_t filenameLength = filenameString.extract(0, filenameString.length(), filenameBuffer, FILENAME_BUF_CAPACITY);
+    if (filenameLength >= FILENAME_BUF_CAPACITY) { // could not fit (with zero termination) in buffer
+        filenameBuffer = static_cast<char *>(uprv_malloc(++filenameLength)); // add one for zero termination
+        if (!filenameBuffer) {
+            return nullptr;
+        }
+        filenameString.extract(0, filenameString.length(), filenameBuffer, filenameLength);
+    }
 
-    result = u_fopen(buffer, perm, locale, codepage);
+    result = u_fopen(filenameBuffer, perm, locale, codepage);
 #if U_PLATFORM_USES_ONLY_WIN32_API
     /* Try Windows API _wfopen if the above fails. */
     if (!result) {
@@ -161,20 +186,25 @@ u_fopen_u(const UChar   *filename,
         wchar_t wperm[40] = {};
         size_t  retVal;
         mbstowcs_s(&retVal, wperm, UPRV_LENGTHOF(wperm), perm, _TRUNCATE);
-        FILE *systemFile = _wfopen((const wchar_t *)filename, wperm);
+        FILE *systemFile = _wfopen(reinterpret_cast<const wchar_t *>(filename), wperm); // may return NULL for long filename
         if (systemFile) {
             result = finit_owner(systemFile, locale, codepage, TRUE);
         }
-        if (!result) {
+        if (!result && systemFile) {
             /* Something bad happened.
-               Maybe the converter couldn't be opened. */
+               Maybe the converter couldn't be opened.
+               Bu do not fclose(systemFile) if systemFile is NULL. */
             fclose(systemFile);
         }
     }
 #endif
+    if (filenameBuffer != buffer) {
+        uprv_free(filenameBuffer);
+    }
     return result; /* not a file leak */
 }
 
+
 U_CAPI UFILE* U_EXPORT2
 u_fstropen(UChar *stringBuf,
            int32_t      capacity,
diff --git a/contrib/libs/icu/io/ufile.h b/contrib/libs/icu/io/ufile.h
index e85208fcd3..88fa40911e 100644
--- a/contrib/libs/icu/io/ufile.h
+++ b/contrib/libs/icu/io/ufile.h
@@ -74,7 +74,7 @@ struct UFILE {
 
     UChar       fUCBuffer[UFILE_UCHARBUFFER_SIZE];/* buffer used for toUnicode */
 
-    UBool       fOwnFile;       /* TRUE if fFile should be closed */
+    UBool       fOwnFile;       /* true if fFile should be closed */
 
     int32_t     fFileno;        /* File number. Useful to determine if it's stdin. */
 };
@@ -100,7 +100,7 @@ ufile_fill_uchar_buffer(UFILE *f);
  * Get one code unit and detect whether the end of file has been reached.
  * @param f The UFILE containing the characters.
  * @param ch The read in character
- * @return TRUE if the character is valid, or FALSE when EOF has been detected
+ * @return true if the character is valid, or false when EOF has been detected
  */
 U_CFUNC UBool U_EXPORT2
 ufile_getch(UFILE *f, UChar *ch);
@@ -109,7 +109,7 @@ ufile_getch(UFILE *f, UChar *ch);
  * Get one character and detect whether the end of file has been reached.
  * @param f The UFILE containing the characters.
  * @param ch The read in character
- * @return TRUE if the character is valid, or FALSE when EOF has been detected
+ * @return true if the character is valid, or false when EOF has been detected
  */
 U_CFUNC UBool U_EXPORT2
 ufile_getch32(UFILE *f, UChar32 *ch);
diff --git a/contrib/libs/icu/io/ufmt_cmn.cpp b/contrib/libs/icu/io/ufmt_cmn.cpp
index 0b6c18ff96..a475175f37 100644
--- a/contrib/libs/icu/io/ufmt_cmn.cpp
+++ b/contrib/libs/icu/io/ufmt_cmn.cpp
@@ -143,7 +143,7 @@ ufmt_uto64(const UChar     *buffer,
     uint64_t        result;
     
     
-    /* intialize parameters */
+    /* initialize parameters */
     limit     = buffer + *len;
     count     = 0;
     result    = 0;
@@ -175,7 +175,7 @@ ufmt_utop(const UChar     *buffer,
         uint8_t bytes[sizeof(void*)];
     } result;
     
-    /* intialize variables */
+    /* initialize variables */
     count      = 0;
     offset     = 0;
     result.ptr = NULL;
diff --git a/contrib/libs/icu/io/ufmt_cmn.h b/contrib/libs/icu/io/ufmt_cmn.h
index 33b56a0175..d040fdce5a 100644
--- a/contrib/libs/icu/io/ufmt_cmn.h
+++ b/contrib/libs/icu/io/ufmt_cmn.h
@@ -82,7 +82,7 @@ ufmt_digitvalue(UChar c);
  * Determine if a UChar is a digit for a specified radix.
  * @param c The UChar to check.
  * @param radix The desired radix.
- * @return TRUE if <TT>c</TT> is a digit in <TT>radix</TT>, FALSE otherwise.
+ * @return true if <TT>c</TT> is a digit in <TT>radix</TT>, false otherwise.
  */
 UBool
 ufmt_isdigit(UChar     c,
@@ -95,7 +95,7 @@ ufmt_isdigit(UChar     c,
  * the number of UChars written to <TT>buffer</TT>.
  * @param value The value to be converted
  * @param radix The desired radix
- * @param uselower TRUE means lower case will be used, FALSE means upper case
+ * @param uselower true means lower case will be used, false means upper case
  * @param minDigits The minimum number of digits for for the formatted number,
  * which will be padded with zeroes. -1 means do not pad.
  */
diff --git a/contrib/libs/icu/io/uscanf_p.cpp b/contrib/libs/icu/io/uscanf_p.cpp
index 6dc1c09dee..5bf3e5b7a8 100644
--- a/contrib/libs/icu/io/uscanf_p.cpp
+++ b/contrib/libs/icu/io/uscanf_p.cpp
@@ -695,9 +695,10 @@ u_scanf_integer_handler(UFILE       *input,
 
     int32_t         len;
     void            *num        = (void*) (args[0].ptrValue);
-    UNumberFormat   *format;
+    UNumberFormat   *format, *localFormat;
     int32_t         parsePos    = 0;
     int32_t         skipped;
+    int32_t         parseIntOnly = 0;
     UErrorCode      status      = U_ZERO_ERROR;
     int64_t         result;
 
@@ -722,11 +723,20 @@ u_scanf_integer_handler(UFILE       *input,
     if(format == 0)
         return 0;
 
+    /* for integer types, do not attempt to parse fractions */
+    localFormat = unum_clone(format, &status);
+    if(U_FAILURE(status))
+        return 0;
+
+    if(info->fSpec == 'd' || info->fSpec == 'i' || info->fSpec == 'u')
+        parseIntOnly = 1;
+    unum_setAttribute(localFormat, UNUM_PARSE_INT_ONLY, parseIntOnly);
+
     /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
-    skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
+    skipped += u_scanf_skip_leading_positive_sign(input, localFormat, &status);
 
     /* parse the number */
-    result = unum_parseInt64(format, input->str.fPos, len, &parsePos, &status);
+    result = unum_parseInt64(localFormat, input->str.fPos, len, &parsePos, &status);
 
     /* mask off any necessary bits */
     if (!info->fSkipArg) {
@@ -741,6 +751,9 @@ u_scanf_integer_handler(UFILE       *input,
     /* update the input's position to reflect consumed data */
     input->str.fPos += parsePos;
 
+    /* cleanup cloned formatter */
+    unum_close(localFormat);
+
     /* we converted 1 arg */
     *argConverted = !info->fSkipArg;
     return parsePos + skipped;
author	romankoshelev <romankoshelev@yandex-team.com>	2023-08-09 20:07:20 +0300
committer	romankoshelev <romankoshelev@yandex-team.com>	2023-08-09 20:59:13 +0300
commit	fd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch)
tree	f582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/io
parent	bf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff)
download	ydb-fd82fb12fb45e71a02c628e45b12c50c0dd0d308.tar.gz