diff options
author | romankoshelev <[email protected]> | 2023-08-09 20:07:20 +0300 |
---|---|---|
committer | romankoshelev <[email protected]> | 2023-08-09 20:59:13 +0300 |
commit | fd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch) | |
tree | f582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/common/uloc.cpp | |
parent | bf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff) |
Update ICU to 70.1
Diffstat (limited to 'contrib/libs/icu/common/uloc.cpp')
-rw-r--r-- | contrib/libs/icu/common/uloc.cpp | 433 |
1 files changed, 211 insertions, 222 deletions
diff --git a/contrib/libs/icu/common/uloc.cpp b/contrib/libs/icu/common/uloc.cpp index 0e235d7958c..c8a3f1ff731 100644 --- a/contrib/libs/icu/common/uloc.cpp +++ b/contrib/libs/icu/common/uloc.cpp @@ -50,9 +50,6 @@ #include "uassert.h" #include "charstr.h" -#include <algorithm> -#include <stdio.h> /* for sprintf */ - U_NAMESPACE_USE /* ### Declarations **************************************************/ @@ -60,12 +57,6 @@ U_NAMESPACE_USE /* Locale stuff from locid.cpp */ U_CFUNC void locale_set_default(const char *id); U_CFUNC const char *locale_get_default(void); -U_CFUNC int32_t -locale_getKeywords(const char *localeID, - char prev, - char *keywords, int32_t keywordCapacity, - UBool valuesToo, - UErrorCode *status); /* ### Data tables **************************************************/ @@ -152,7 +143,7 @@ static const char * const LANGUAGES[] = { "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg", "mga", "mgh", "mgo", "mh", "mi", "mic", "min", "mis", "mk", - "ml", "mn", "mnc", "mni", "mo", + "ml", "mn", "mnc", "mni", "moh", "mos", "mr", "mrj", "ms", "mt", "mua", "mul", "mus", "mwl", "mwr", "mwv", "my", "mye", "myv", "mzn", @@ -175,9 +166,9 @@ static const char * const LANGUAGES[] = { "sl", "sli", "sly", "sm", "sma", "smj", "smn", "sms", "sn", "snk", "so", "sog", "sq", "sr", "srn", "srr", "ss", "ssy", "st", "stq", "su", "suk", "sus", "sux", - "sv", "sw", "swb", "swc", "syc", "syr", "szl", + "sv", "sw", "swb", "syc", "syr", "szl", "ta", "tcy", "te", "tem", "teo", "ter", "tet", "tg", - "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tl", + "th", "ti", "tig", "tiv", "tk", "tkl", "tkr", "tlh", "tli", "tly", "tmh", "tn", "to", "tog", "tpi", "tr", "tru", "trv", "ts", "tsd", "tsi", "tt", "ttt", "tum", "tvl", "tw", "twq", "ty", "tyv", "tzm", @@ -190,7 +181,7 @@ static const char * const LANGUAGES[] = { "za", "zap", "zbl", "zea", "zen", "zgh", "zh", "zu", "zun", "zxx", "zza", NULL, - "in", "iw", "ji", "jw", "sh", /* obsolete language codes */ + "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", /* obsolete language codes */ NULL }; @@ -269,7 +260,7 @@ static const char * const LANGUAGES_3[] = { "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde", "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga", "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd", - "mal", "mon", "mnc", "mni", "mol", + "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj", "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv", "mya", "mye", "myv", "mzn", @@ -292,9 +283,9 @@ static const char * const LANGUAGES_3[] = { "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms", "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr", "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux", - "swe", "swa", "swb", "swc", "syc", "syr", "szl", + "swe", "swa", "swb", "syc", "syr", "szl", "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk", - "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl", + "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi", "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt", "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm", @@ -307,8 +298,8 @@ static const char * const LANGUAGES_3[] = { "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul", "zun", "zxx", "zza", NULL, -/* "in", "iw", "ji", "jw", "sh", */ - "ind", "heb", "yid", "jaw", "srp", +/* "in", "iw", "ji", "jw", "mo", "sh", "swc", "tl", */ + "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl", NULL }; @@ -343,13 +334,13 @@ static const char * const COUNTRIES[] = { "BJ", "BL", "BM", "BN", "BO", "BQ", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", - "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", - "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", + "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", + "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", - "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", + "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS", "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", @@ -366,7 +357,7 @@ static const char * const COUNTRIES[] = { "TK", "TL", "TM", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", - "WS", "YE", "YT", "ZA", "ZM", "ZW", + "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", NULL, "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR", /* obsolete country codes */ NULL @@ -406,10 +397,10 @@ static const char * const COUNTRIES_3[] = { "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG", /* "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", */ "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI", -/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DJ", "DK", */ - "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK", -/* "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", */ - "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI", +/* "CU", "CV", "CW", "CX", "CY", "CZ", "DE", "DG", "DJ", "DK", */ + "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK", +/* "DM", "DO", "DZ", "EA", "EC", "EE", "EG", "EH", "ER", */ + "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI", /* "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", */ "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA", /* "GA", "GB", "GD", "GE", "GF", "GG", "GH", "GI", "GL", */ @@ -418,8 +409,8 @@ static const char * const COUNTRIES_3[] = { "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM", /* "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", */ "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN", -/* "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ - "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", +/* "IC", "ID", "IE", "IL", "IM", "IN", "IO", "IQ", "IR", "IS" */ + "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL", /* "IT", "JE", "JM", "JO", "JP", "KE", "KG", "KH", "KI", */ "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR", /* "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", */ @@ -452,8 +443,8 @@ static const char * const COUNTRIES_3[] = { "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB", /* "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", */ "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF", -/* "WS", "YE", "YT", "ZA", "ZM", "ZW", */ - "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE", +/* "WS", "XK", "YE", "YT", "ZA", "ZM", "ZW", */ + "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE", NULL, /* "AN", "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR" */ "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR", @@ -487,15 +478,24 @@ static const CanonicalizationMap CANONICALIZE_MAP[] = { /* Test if the locale id has BCP47 u extension and does not have '@' */ #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1) /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */ -#define _ConvertBCP47(finalID, id, buffer, length,err) UPRV_BLOCK_MACRO_BEGIN { \ - if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || \ - U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \ - finalID=id; \ - if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \ - } else { \ - finalID=buffer; \ - } \ -} UPRV_BLOCK_MACRO_END +static const char* _ConvertBCP47( + const char* id, char* buffer, int32_t length, + UErrorCode* err, int32_t* pLocaleIdSize) { + const char* finalID; + int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err); + if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { + finalID=id; + if (*err == U_STRING_NOT_TERMINATED_WARNING) { + *err = U_BUFFER_OVERFLOW_ERROR; + } + } else { + finalID=buffer; + } + if (pLocaleIdSize != nullptr) { + *pLocaleIdSize = localeIDSize; + } + return finalID; +} /* Gets the size of the shortest subtag in the given localeID. */ static int32_t getShortestSubtagLength(const char *localeID) { int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID)); @@ -601,12 +601,12 @@ compareKeywordStructs(const void * /*context*/, const void *left, const void *ri return uprv_strcmp(leftString, rightString); } -static void -_getKeywords(const char *localeID, - char prev, - ByteSink& sink, - UBool valuesToo, - UErrorCode *status) +U_CFUNC void +ulocimp_getKeywords(const char *localeID, + char prev, + ByteSink& sink, + UBool valuesToo, + UErrorCode *status) { KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS]; @@ -722,18 +722,18 @@ _getKeywords(const char *localeID, } } -U_CFUNC int32_t -locale_getKeywords(const char *localeID, - char prev, - char *keywords, int32_t keywordCapacity, - UBool valuesToo, - UErrorCode *status) { +U_CAPI int32_t U_EXPORT2 +uloc_getKeywordValue(const char* localeID, + const char* keywordName, + char* buffer, int32_t bufferCapacity, + UErrorCode* status) +{ if (U_FAILURE(*status)) { return 0; } - CheckedArrayByteSink sink(keywords, keywordCapacity); - _getKeywords(localeID, prev, sink, valuesToo, status); + CheckedArrayByteSink sink(buffer, bufferCapacity); + ulocimp_getKeywordValue(localeID, keywordName, sink, status); int32_t reslen = sink.NumberOfBytesAppended(); @@ -744,26 +744,22 @@ locale_getKeywords(const char *localeID, if (sink.Overflowed()) { *status = U_BUFFER_OVERFLOW_ERROR; } else { - u_terminateChars(keywords, keywordCapacity, reslen, status); + u_terminateChars(buffer, bufferCapacity, reslen, status); } return reslen; } -U_CAPI int32_t U_EXPORT2 -uloc_getKeywordValue(const char* localeID, - const char* keywordName, - char* buffer, int32_t bufferCapacity, - UErrorCode* status) +U_CAPI void U_EXPORT2 +ulocimp_getKeywordValue(const char* localeID, + const char* keywordName, + icu::ByteSink& sink, + UErrorCode* status) { - if (buffer != nullptr) { - buffer[0] = '\0'; - } const char* startSearchHere = NULL; const char* nextSeparator = NULL; char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN]; - int32_t result = 0; if(status && U_SUCCESS(*status) && localeID) { char tempBuffer[ULOC_FULLNAME_CAPACITY]; @@ -771,16 +767,17 @@ uloc_getKeywordValue(const char* localeID, if (keywordName == NULL || keywordName[0] == 0) { *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; + return; } locale_canonKeywordName(keywordNameBuffer, keywordName, status); if(U_FAILURE(*status)) { - return 0; + return; } if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); + tmpLocaleID = _ConvertBCP47(localeID, tempBuffer, + sizeof(tempBuffer), status, nullptr); } else { tmpLocaleID=localeID; } @@ -788,7 +785,7 @@ uloc_getKeywordValue(const char* localeID, startSearchHere = locale_getKeywordsStart(tmpLocaleID); if(startSearchHere == NULL) { /* no keywords, return at once */ - return 0; + return; } /* find the first keyword */ @@ -800,7 +797,7 @@ uloc_getKeywordValue(const char* localeID, nextSeparator = uprv_strchr(startSearchHere, '='); if(!nextSeparator) { *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */ - return 0; + return; } /* strip leading & trailing spaces (TC decided to tolerate these) */ while(*startSearchHere == ' ') { @@ -814,20 +811,20 @@ uloc_getKeywordValue(const char* localeID, /* copy & normalize keyName from locale */ if (startSearchHere == keyValueTail) { *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */ - return 0; + return; } keyValueLen = 0; while (startSearchHere < keyValueTail) { if (!UPRV_ISALPHANUM(*startSearchHere)) { *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ - return 0; + return; } if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++); } else { /* keyword name too long for internal buffer */ *status = U_INTERNAL_PROGRAM_ERROR; - return 0; + return; } } localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */ @@ -848,28 +845,20 @@ uloc_getKeywordValue(const char* localeID, /* Now copy the value, but check well-formedness */ if (nextSeparator == keyValueTail) { *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */ - return 0; + return; } - keyValueLen = 0; while (nextSeparator < keyValueTail) { if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */ - return 0; - } - if (keyValueLen < bufferCapacity) { - /* Should we lowercase value to return here? Tests expect as-is. */ - buffer[keyValueLen++] = *nextSeparator++; - } else { /* keep advancing so we return correct length in case of overflow */ - keyValueLen++; - nextSeparator++; + return; } + /* Should we lowercase value to return here? Tests expect as-is. */ + sink.Append(nextSeparator++, 1); } - result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status); - return result; + return; } } } - return 0; } U_CAPI int32_t U_EXPORT2 @@ -892,13 +881,15 @@ uloc_setKeywordValue(const char* keywordName, char* startSearchHere = NULL; char* keywordStart = NULL; CharString updatedKeysAndValues; - int32_t updatedKeysAndValuesLen; UBool handledInputKeyAndValue = FALSE; char keyValuePrefix = '@'; if(U_FAILURE(*status)) { return -1; } + if (*status == U_STRING_NOT_TERMINATED_WARNING) { + *status = U_ZERO_ERROR; + } if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { *status = U_ILLEGAL_ARGUMENT_ERROR; return 0; @@ -936,6 +927,7 @@ uloc_setKeywordValue(const char* keywordName, startSearchHere = (char*)locale_getKeywordsStart(buffer); if(startSearchHere == NULL || (startSearchHere[1]==0)) { if(keywordValueLen == 0) { /* no keywords = nothing to remove */ + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return bufLen; } @@ -955,6 +947,7 @@ uloc_setKeywordValue(const char* keywordName, startSearchHere += keywordNameLen; *startSearchHere++ = '='; uprv_strcpy(startSearchHere, keywordValueBuffer); + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return needLen; } /* end shortcut - no @ */ @@ -1069,20 +1062,26 @@ uloc_setKeywordValue(const char* keywordName, if (!handledInputKeyAndValue || U_FAILURE(*status)) { /* if input key/value specified removal of a keyword not present in locale, or * there was an error in CharString.append, leave original locale alone. */ + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return bufLen; } - updatedKeysAndValuesLen = updatedKeysAndValues.length(); - /* needLen = length of the part before '@' + length of updated key-value part including '@' */ - needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen; - if(needLen >= bufferCapacity) { + // needLen = length of the part before '@' + needLen = (int32_t)(startSearchHere - buffer); + // Check to see can we fit the startSearchHere, if not, return + // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it. + // We do this because this API function does not behave like most others: + // It promises never to set a U_STRING_NOT_TERMINATED_WARNING. + // When the contents fits but without the terminating NUL, in this case we need to not change + // the buffer contents and return with a buffer overflow error. + int32_t appendLength = updatedKeysAndValues.length(); + if (appendLength >= bufferCapacity - needLen) { *status = U_BUFFER_OVERFLOW_ERROR; - return needLen; /* no change */ - } - if (updatedKeysAndValuesLen > 0) { - uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen); + return needLen + appendLength; } - buffer[needLen]=0; + needLen += updatedKeysAndValues.extract( + startSearchHere, bufferCapacity - needLen, *status); + U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING); return needLen; } @@ -1148,7 +1147,7 @@ uloc_getCurrentLanguageID(const char* oldID){ * * TODO try to use this in Locale */ -static CharString +CharString U_EXPORT2 ulocimp_getLanguage(const char *localeID, const char **pEnd, UErrorCode &status) { @@ -1193,21 +1192,7 @@ ulocimp_getLanguage(const char *localeID, return result; } -U_CFUNC int32_t -ulocimp_getLanguage(const char *localeID, - char *language, int32_t languageCapacity, - const char **pEnd) { - ErrorCode status; - CharString result = ulocimp_getLanguage(localeID, pEnd, status); - if (status.isFailure()) { - return 0; - } - int32_t reslen = result.length(); - uprv_memcpy(language, result.data(), std::min(reslen, languageCapacity)); - return reslen; -} - -static CharString +CharString U_EXPORT2 ulocimp_getScript(const char *localeID, const char **pEnd, UErrorCode &status) { @@ -1241,21 +1226,7 @@ ulocimp_getScript(const char *localeID, return result; } -U_CFUNC int32_t -ulocimp_getScript(const char *localeID, - char *script, int32_t scriptCapacity, - const char **pEnd) { - ErrorCode status; - CharString result = ulocimp_getScript(localeID, pEnd, status); - if (status.isFailure()) { - return 0; - } - int32_t reslen = result.length(); - uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity)); - return reslen; -} - -static CharString +CharString U_EXPORT2 ulocimp_getCountry(const char *localeID, const char **pEnd, UErrorCode &status) { @@ -1290,29 +1261,15 @@ ulocimp_getCountry(const char *localeID, return result; } -U_CFUNC int32_t -ulocimp_getCountry(const char *localeID, - char *country, int32_t countryCapacity, - const char **pEnd) { - ErrorCode status; - CharString result = ulocimp_getCountry(localeID, pEnd, status); - if (status.isFailure()) { - return 0; - } - int32_t reslen = result.length(); - uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity)); - return reslen; -} - /** * @param needSeparator if true, then add leading '_' if any variants * are added to 'variant' */ static void -_getVariantEx(const char *localeID, - char prev, - ByteSink& sink, - UBool needSeparator) { +_getVariant(const char *localeID, + char prev, + ByteSink& sink, + UBool needSeparator) { UBool hasVariant = FALSE; /* get one or more variant tags and separate them with '_' */ @@ -1353,23 +1310,6 @@ _getVariantEx(const char *localeID, } } -static int32_t -_getVariantEx(const char *localeID, - char prev, - char *variant, int32_t variantCapacity, - UBool needSeparator) { - CheckedArrayByteSink sink(variant, variantCapacity); - _getVariantEx(localeID, prev, sink, needSeparator); - return sink.NumberOfBytesAppended(); -} - -static int32_t -_getVariant(const char *localeID, - char prev, - char *variant, int32_t variantCapacity) { - return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE); -} - /* Keyword enumeration */ typedef struct UKeywordsContext { @@ -1466,9 +1406,6 @@ U_CAPI UEnumeration* U_EXPORT2 uloc_openKeywords(const char* localeID, UErrorCode* status) { - int32_t i=0; - char keywords[256]; - int32_t keywordsCapacity = 256; char tempBuffer[ULOC_FULLNAME_CAPACITY]; const char* tmpLocaleID; @@ -1477,43 +1414,52 @@ uloc_openKeywords(const char* localeID, } if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status); + tmpLocaleID = _ConvertBCP47(localeID, tempBuffer, + sizeof(tempBuffer), status, nullptr); } else { if (localeID==NULL) { - localeID=uloc_getDefault(); + localeID=uloc_getDefault(); } tmpLocaleID=localeID; } /* Skip the language */ - ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); + ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status); + if (U_FAILURE(*status)) { + return 0; + } + if(_isIDSeparator(*tmpLocaleID)) { const char *scriptID; /* Skip the script if available */ - ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); + ulocimp_getScript(tmpLocaleID+1, &scriptID, *status); + if (U_FAILURE(*status)) { + return 0; + } if(scriptID != tmpLocaleID+1) { /* Found optional script */ tmpLocaleID = scriptID; } /* Skip the Country */ if (_isIDSeparator(*tmpLocaleID)) { - ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID); - if(_isIDSeparator(*tmpLocaleID)) { - _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0); + ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status); + if (U_FAILURE(*status)) { + return 0; } } } /* keywords are located after '@' */ if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { - i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, FALSE, status); - } - - if(i) { - return uloc_openKeywordList(keywords, i, status); - } else { - return NULL; + CharString keywords; + CharStringByteSink sink(&keywords); + ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status); + if (U_FAILURE(*status)) { + return NULL; + } + return uloc_openKeywordList(keywords.data(), keywords.length(), status); } + return NULL; } @@ -1538,19 +1484,41 @@ _canonicalize(const char* localeID, ByteSink& sink, uint32_t options, UErrorCode* err) { + if (U_FAILURE(*err)) { + return; + } + int32_t j, fieldCount=0, scriptSize=0, variantSize=0; - char tempBuffer[ULOC_FULLNAME_CAPACITY]; + PreflightingLocaleIDBuffer tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this + CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this const char* origLocaleID; const char* tmpLocaleID; const char* keywordAssign = NULL; const char* separatorIndicator = NULL; - if (U_FAILURE(*err)) { - return; - } - if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); + const char* localeIDPtr = localeID; + + // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string + if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') { + localeIDWithHyphens.append(localeID, -1, *err); + if (U_SUCCESS(*err)) { + for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) { + if (*p == '_') { + *p = '-'; + } + } + localeIDPtr = localeIDWithHyphens.data(); + } + } + + do { + // After this call tmpLocaleID may point to localeIDPtr which may + // point to either localeID or localeIDWithHyphens.data(). + tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(), + tempBuffer.getCapacity(), err, + &(tempBuffer.requestedCapacity)); + } while (tempBuffer.needToTryAgain(err)); } else { if (localeID==NULL) { localeID=uloc_getDefault(); @@ -1605,7 +1573,7 @@ _canonicalize(const char* localeID, variantSize = -tag.length(); { CharStringByteSink s(&tag); - _getVariantEx(tmpLocaleID+1, *tmpLocaleID, s, FALSE); + _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE); } variantSize += tag.length(); if (variantSize > 0) { @@ -1667,7 +1635,7 @@ _canonicalize(const char* localeID, int32_t posixVariantSize = -tag.length(); { CharStringByteSink s(&tag); - _getVariantEx(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0)); + _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0)); } posixVariantSize += tag.length(); if (posixVariantSize > 0) { @@ -1696,7 +1664,7 @@ _canonicalize(const char* localeID, (!separatorIndicator || separatorIndicator > keywordAssign)) { sink.Append("@", 1); ++fieldCount; - _getKeywords(tmpLocaleID+1, '@', sink, TRUE, err); + ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err); } } } @@ -1745,7 +1713,6 @@ uloc_getLanguage(const char* localeID, UErrorCode* err) { /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ - int32_t i=0; if (err==NULL || U_FAILURE(*err)) { return 0; @@ -1755,8 +1722,7 @@ uloc_getLanguage(const char* localeID, localeID=uloc_getDefault(); } - i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL); - return u_terminateChars(language, languageCapacity, i, err); + return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err); } U_CAPI int32_t U_EXPORT2 @@ -1765,8 +1731,6 @@ uloc_getScript(const char* localeID, int32_t scriptCapacity, UErrorCode* err) { - int32_t i=0; - if(err==NULL || U_FAILURE(*err)) { return 0; } @@ -1776,11 +1740,15 @@ uloc_getScript(const char* localeID, } /* skip the language */ - ulocimp_getLanguage(localeID, NULL, 0, &localeID); + ulocimp_getLanguage(localeID, &localeID, *err); + if (U_FAILURE(*err)) { + return 0; + } + if(_isIDSeparator(*localeID)) { - i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL); + return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err); } - return u_terminateChars(script, scriptCapacity, i, err); + return u_terminateChars(script, scriptCapacity, 0, err); } U_CAPI int32_t U_EXPORT2 @@ -1789,8 +1757,6 @@ uloc_getCountry(const char* localeID, int32_t countryCapacity, UErrorCode* err) { - int32_t i=0; - if(err==NULL || U_FAILURE(*err)) { return 0; } @@ -1800,20 +1766,27 @@ uloc_getCountry(const char* localeID, } /* Skip the language */ - ulocimp_getLanguage(localeID, NULL, 0, &localeID); + ulocimp_getLanguage(localeID, &localeID, *err); + if (U_FAILURE(*err)) { + return 0; + } + if(_isIDSeparator(*localeID)) { const char *scriptID; /* Skip the script if available */ - ulocimp_getScript(localeID+1, NULL, 0, &scriptID); + ulocimp_getScript(localeID+1, &scriptID, *err); + if (U_FAILURE(*err)) { + return 0; + } if(scriptID != localeID+1) { /* Found optional script */ localeID = scriptID; } if(_isIDSeparator(*localeID)) { - i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL); + return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err); } } - return u_terminateChars(country, countryCapacity, i, err); + return u_terminateChars(country, countryCapacity, 0, err); } U_CAPI int32_t U_EXPORT2 @@ -1831,7 +1804,7 @@ uloc_getVariant(const char* localeID, } if (_hasBCP47Extension(localeID)) { - _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err); + tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr); } else { if (localeID==NULL) { localeID=uloc_getDefault(); @@ -1840,11 +1813,18 @@ uloc_getVariant(const char* localeID, } /* Skip the language */ - ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID); + ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err); + if (U_FAILURE(*err)) { + return 0; + } + if(_isIDSeparator(*tmpLocaleID)) { const char *scriptID; /* Skip the script if available */ - ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID); + ulocimp_getScript(tmpLocaleID+1, &scriptID, *err); + if (U_FAILURE(*err)) { + return 0; + } if(scriptID != tmpLocaleID+1) { /* Found optional script */ tmpLocaleID = scriptID; @@ -1852,7 +1832,10 @@ uloc_getVariant(const char* localeID, /* Skip the Country */ if (_isIDSeparator(*tmpLocaleID)) { const char *cntryID; - ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID); + ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err); + if (U_FAILURE(*err)) { + return 0; + } if (cntryID != tmpLocaleID+1) { /* Found optional country */ tmpLocaleID = cntryID; @@ -1862,18 +1845,24 @@ uloc_getVariant(const char* localeID, if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { tmpLocaleID++; } - i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity); + + CheckedArrayByteSink sink(variant, variantCapacity); + _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE); + + i = sink.NumberOfBytesAppended(); + + if (U_FAILURE(*err)) { + return i; + } + + if (sink.Overflowed()) { + *err = U_BUFFER_OVERFLOW_ERROR; + return i; + } } } } - /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */ - /* if we do not have a variant tag yet then try a POSIX variant after '@' */ -/* - if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) { - i=_getVariant(localeID+1, '@', variant, variantCapacity); - } -*/ return u_terminateChars(variant, variantCapacity, i, err); } @@ -1905,7 +1894,7 @@ uloc_getName(const char* localeID, return reslen; } -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 ulocimp_getName(const char* localeID, ByteSink& sink, UErrorCode* err) @@ -1941,7 +1930,7 @@ uloc_getBaseName(const char* localeID, return reslen; } -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 ulocimp_getBaseName(const char* localeID, ByteSink& sink, UErrorCode* err) @@ -1977,7 +1966,7 @@ uloc_canonicalize(const char* localeID, return reslen; } -U_STABLE void U_EXPORT2 +U_CAPI void U_EXPORT2 ulocimp_canonicalize(const char* localeID, ByteSink& sink, UErrorCode* err) @@ -2058,22 +2047,22 @@ uloc_getLCID(const char* localeID) // uprv_convertToLCID does not support keywords other than collation. // Remove all keywords except collation. int32_t len; - char collVal[ULOC_KEYWORDS_CAPACITY]; char tmpLocaleID[ULOC_FULLNAME_CAPACITY]; - len = uloc_getKeywordValue(localeID, "collation", collVal, - UPRV_LENGTHOF(collVal) - 1, &status); - - if (U_SUCCESS(status) && len > 0) { - collVal[len] = 0; + CharString collVal; + { + CharStringByteSink sink(&collVal); + ulocimp_getKeywordValue(localeID, "collation", sink, &status); + } + if (U_SUCCESS(status) && !collVal.isEmpty()) { len = uloc_getBaseName(localeID, tmpLocaleID, UPRV_LENGTHOF(tmpLocaleID) - 1, &status); if (U_SUCCESS(status) && len > 0) { tmpLocaleID[len] = 0; - len = uloc_setKeywordValue("collation", collVal, tmpLocaleID, + len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID, UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status); if (U_SUCCESS(status) && len > 0) { |