diff options
author | vvvv <vvvv@yandex-team.ru> | 2022-02-11 19:00:01 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.ru> | 2022-02-11 19:00:01 +0300 |
commit | 4f5398551111a2b05b55de391d5d296bd0a670bf (patch) | |
tree | 422aa536507b05260da88883c27d1c9742a292fd /contrib/libs/icu/common/ucptrie.cpp | |
parent | 0d99fc6efe15562b8474a702ab18e406ac102cdf (diff) | |
download | ydb-4f5398551111a2b05b55de391d5d296bd0a670bf.tar.gz |
YQL-13794 remove most of boost & ICU dependency
ref:eae1a1c30940ed737c886518182fd97bd872667d
Diffstat (limited to 'contrib/libs/icu/common/ucptrie.cpp')
-rw-r--r-- | contrib/libs/icu/common/ucptrie.cpp | 601 |
1 files changed, 0 insertions, 601 deletions
diff --git a/contrib/libs/icu/common/ucptrie.cpp b/contrib/libs/icu/common/ucptrie.cpp deleted file mode 100644 index 0004160a23..0000000000 --- a/contrib/libs/icu/common/ucptrie.cpp +++ /dev/null @@ -1,601 +0,0 @@ -// © 2017 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -// ucptrie.cpp (modified from utrie2.cpp) -// created: 2017dec29 Markus W. Scherer - -// #define UCPTRIE_DEBUG -#ifdef UCPTRIE_DEBUG -# include <stdio.h> -#endif - -#include "unicode/utypes.h" -#include "unicode/ucptrie.h" -#include "unicode/utf.h" -#include "unicode/utf8.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "uassert.h" -#include "ucptrie_impl.h" - -U_CAPI UCPTrie * U_EXPORT2 -ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth, - const void *data, int32_t length, int32_t *pActualLength, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return nullptr; - } - - if (length <= 0 || (U_POINTER_MASK_LSB(data, 3) != 0) || - type < UCPTRIE_TYPE_ANY || UCPTRIE_TYPE_SMALL < type || - valueWidth < UCPTRIE_VALUE_BITS_ANY || UCPTRIE_VALUE_BITS_8 < valueWidth) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return nullptr; - } - - // Enough data for a trie header? - if (length < (int32_t)sizeof(UCPTrieHeader)) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - // Check the signature. - const UCPTrieHeader *header = (const UCPTrieHeader *)data; - if (header->signature != UCPTRIE_SIG) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - int32_t options = header->options; - int32_t typeInt = (options >> 6) & 3; - int32_t valueWidthInt = options & UCPTRIE_OPTIONS_VALUE_BITS_MASK; - if (typeInt > UCPTRIE_TYPE_SMALL || valueWidthInt > UCPTRIE_VALUE_BITS_8 || - (options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - UCPTrieType actualType = (UCPTrieType)typeInt; - UCPTrieValueWidth actualValueWidth = (UCPTrieValueWidth)valueWidthInt; - if (type < 0) { - type = actualType; - } - if (valueWidth < 0) { - valueWidth = actualValueWidth; - } - if (type != actualType || valueWidth != actualValueWidth) { - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - // Get the length values and offsets. - UCPTrie tempTrie; - uprv_memset(&tempTrie, 0, sizeof(tempTrie)); - tempTrie.indexLength = header->indexLength; - tempTrie.dataLength = - ((options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | header->dataLength; - tempTrie.index3NullOffset = header->index3NullOffset; - tempTrie.dataNullOffset = - ((options & UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK) << 8) | header->dataNullOffset; - - tempTrie.highStart = header->shiftedHighStart << UCPTRIE_SHIFT_2; - tempTrie.shifted12HighStart = (tempTrie.highStart + 0xfff) >> 12; - tempTrie.type = type; - tempTrie.valueWidth = valueWidth; - - // Calculate the actual length. - int32_t actualLength = (int32_t)sizeof(UCPTrieHeader) + tempTrie.indexLength * 2; - if (valueWidth == UCPTRIE_VALUE_BITS_16) { - actualLength += tempTrie.dataLength * 2; - } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { - actualLength += tempTrie.dataLength * 4; - } else { - actualLength += tempTrie.dataLength; - } - if (length < actualLength) { - *pErrorCode = U_INVALID_FORMAT_ERROR; // Not enough bytes. - return nullptr; - } - - // Allocate the trie. - UCPTrie *trie = (UCPTrie *)uprv_malloc(sizeof(UCPTrie)); - if (trie == nullptr) { - *pErrorCode = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - uprv_memcpy(trie, &tempTrie, sizeof(tempTrie)); -#ifdef UCPTRIE_DEBUG - trie->name = "fromSerialized"; -#endif - - // Set the pointers to its index and data arrays. - const uint16_t *p16 = (const uint16_t *)(header + 1); - trie->index = p16; - p16 += trie->indexLength; - - // Get the data. - int32_t nullValueOffset = trie->dataNullOffset; - if (nullValueOffset >= trie->dataLength) { - nullValueOffset = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - } - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - trie->data.ptr16 = p16; - trie->nullValue = trie->data.ptr16[nullValueOffset]; - break; - case UCPTRIE_VALUE_BITS_32: - trie->data.ptr32 = (const uint32_t *)p16; - trie->nullValue = trie->data.ptr32[nullValueOffset]; - break; - case UCPTRIE_VALUE_BITS_8: - trie->data.ptr8 = (const uint8_t *)p16; - trie->nullValue = trie->data.ptr8[nullValueOffset]; - break; - default: - // Unreachable because valueWidth was checked above. - *pErrorCode = U_INVALID_FORMAT_ERROR; - return nullptr; - } - - if (pActualLength != nullptr) { - *pActualLength = actualLength; - } - return trie; -} - -U_CAPI void U_EXPORT2 -ucptrie_close(UCPTrie *trie) { - uprv_free(trie); -} - -U_CAPI UCPTrieType U_EXPORT2 -ucptrie_getType(const UCPTrie *trie) { - return (UCPTrieType)trie->type; -} - -U_CAPI UCPTrieValueWidth U_EXPORT2 -ucptrie_getValueWidth(const UCPTrie *trie) { - return (UCPTrieValueWidth)trie->valueWidth; -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c) { - int32_t i1 = c >> UCPTRIE_SHIFT_1; - if (trie->type == UCPTRIE_TYPE_FAST) { - U_ASSERT(0xffff < c && c < trie->highStart); - i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; - } else { - U_ASSERT((uint32_t)c < (uint32_t)trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); - i1 += UCPTRIE_SMALL_INDEX_LENGTH; - } - int32_t i3Block = trie->index[ - (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; - int32_t i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; - int32_t dataBlock; - if ((i3Block & 0x8000) == 0) { - // 16-bit indexes - dataBlock = trie->index[i3Block + i3]; - } else { - // 18-bit indexes stored in groups of 9 entries per 8 indexes. - i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); - i3 &= 7; - dataBlock = ((int32_t)trie->index[i3Block++] << (2 + (2 * i3))) & 0x30000; - dataBlock |= trie->index[i3Block + i3]; - } - return dataBlock + (c & UCPTRIE_SMALL_DATA_MASK); -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3) { - UChar32 c = (lt1 << 12) | (t2 << 6) | t3; - if (c >= trie->highStart) { - // Possible because the UTF-8 macro compares with shifted12HighStart which may be higher. - return trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - } - return ucptrie_internalSmallIndex(trie, c); -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c, - const uint8_t *start, const uint8_t *src) { - int32_t i, length; - // Support 64-bit pointers by avoiding cast of arbitrary difference. - if ((src - start) <= 7) { - i = length = (int32_t)(src - start); - } else { - i = length = 7; - start = src - 7; - } - c = utf8_prevCharSafeBody(start, 0, &i, c, -1); - i = length - i; // Number of bytes read backward from src. - int32_t idx = _UCPTRIE_CP_INDEX(trie, 0xffff, c); - return (idx << 3) | i; -} - -namespace { - -inline uint32_t getValue(UCPTrieData data, UCPTrieValueWidth valueWidth, int32_t dataIndex) { - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - return data.ptr16[dataIndex]; - case UCPTRIE_VALUE_BITS_32: - return data.ptr32[dataIndex]; - case UCPTRIE_VALUE_BITS_8: - return data.ptr8[dataIndex]; - default: - // Unreachable if the trie is properly initialized. - return 0xffffffff; - } -} - -} // namespace - -U_CAPI uint32_t U_EXPORT2 -ucptrie_get(const UCPTrie *trie, UChar32 c) { - int32_t dataIndex; - if ((uint32_t)c <= 0x7f) { - // linear ASCII - dataIndex = c; - } else { - UChar32 fastMax = trie->type == UCPTRIE_TYPE_FAST ? 0xffff : UCPTRIE_SMALL_MAX; - dataIndex = _UCPTRIE_CP_INDEX(trie, fastMax, c); - } - return getValue(trie->data, (UCPTrieValueWidth)trie->valueWidth, dataIndex); -} - -namespace { - -constexpr int32_t MAX_UNICODE = 0x10ffff; - -inline uint32_t maybeFilterValue(uint32_t value, uint32_t trieNullValue, uint32_t nullValue, - UCPMapValueFilter *filter, const void *context) { - if (value == trieNullValue) { - value = nullValue; - } else if (filter != nullptr) { - value = filter(context, value); - } - return value; -} - -UChar32 getRange(const void *t, UChar32 start, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - if ((uint32_t)start > MAX_UNICODE) { - return U_SENTINEL; - } - const UCPTrie *trie = reinterpret_cast<const UCPTrie *>(t); - UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; - if (start >= trie->highStart) { - if (pValue != nullptr) { - int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - uint32_t value = getValue(trie->data, valueWidth, di); - if (filter != nullptr) { value = filter(context, value); } - *pValue = value; - } - return MAX_UNICODE; - } - - uint32_t nullValue = trie->nullValue; - if (filter != nullptr) { nullValue = filter(context, nullValue); } - const uint16_t *index = trie->index; - - int32_t prevI3Block = -1; - int32_t prevBlock = -1; - UChar32 c = start; - uint32_t trieValue, value = nullValue; - bool haveValue = false; - do { - int32_t i3Block; - int32_t i3; - int32_t i3BlockLength; - int32_t dataBlockLength; - if (c <= 0xffff && (trie->type == UCPTRIE_TYPE_FAST || c <= UCPTRIE_SMALL_MAX)) { - i3Block = 0; - i3 = c >> UCPTRIE_FAST_SHIFT; - i3BlockLength = trie->type == UCPTRIE_TYPE_FAST ? - UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH; - dataBlockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH; - } else { - // Use the multi-stage index. - int32_t i1 = c >> UCPTRIE_SHIFT_1; - if (trie->type == UCPTRIE_TYPE_FAST) { - U_ASSERT(0xffff < c && c < trie->highStart); - i1 += UCPTRIE_BMP_INDEX_LENGTH - UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH; - } else { - U_ASSERT(c < trie->highStart && trie->highStart > UCPTRIE_SMALL_LIMIT); - i1 += UCPTRIE_SMALL_INDEX_LENGTH; - } - i3Block = trie->index[ - (int32_t)trie->index[i1] + ((c >> UCPTRIE_SHIFT_2) & UCPTRIE_INDEX_2_MASK)]; - if (i3Block == prevI3Block && (c - start) >= UCPTRIE_CP_PER_INDEX_2_ENTRY) { - // The index-3 block is the same as the previous one, and filled with value. - U_ASSERT((c & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0); - c += UCPTRIE_CP_PER_INDEX_2_ENTRY; - continue; - } - prevI3Block = i3Block; - if (i3Block == trie->index3NullOffset) { - // This is the index-3 null block. - if (haveValue) { - if (nullValue != value) { - return c - 1; - } - } else { - trieValue = trie->nullValue; - value = nullValue; - if (pValue != nullptr) { *pValue = nullValue; } - haveValue = true; - } - prevBlock = trie->dataNullOffset; - c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1); - continue; - } - i3 = (c >> UCPTRIE_SHIFT_3) & UCPTRIE_INDEX_3_MASK; - i3BlockLength = UCPTRIE_INDEX_3_BLOCK_LENGTH; - dataBlockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH; - } - // Enumerate data blocks for one index-3 block. - do { - int32_t block; - if ((i3Block & 0x8000) == 0) { - block = index[i3Block + i3]; - } else { - // 18-bit indexes stored in groups of 9 entries per 8 indexes. - int32_t group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); - int32_t gi = i3 & 7; - block = ((int32_t)index[group++] << (2 + (2 * gi))) & 0x30000; - block |= index[group + gi]; - } - if (block == prevBlock && (c - start) >= dataBlockLength) { - // The block is the same as the previous one, and filled with value. - U_ASSERT((c & (dataBlockLength - 1)) == 0); - c += dataBlockLength; - } else { - int32_t dataMask = dataBlockLength - 1; - prevBlock = block; - if (block == trie->dataNullOffset) { - // This is the data null block. - if (haveValue) { - if (nullValue != value) { - return c - 1; - } - } else { - trieValue = trie->nullValue; - value = nullValue; - if (pValue != nullptr) { *pValue = nullValue; } - haveValue = true; - } - c = (c + dataBlockLength) & ~dataMask; - } else { - int32_t di = block + (c & dataMask); - uint32_t trieValue2 = getValue(trie->data, valueWidth, di); - if (haveValue) { - if (trieValue2 != trieValue) { - if (filter == nullptr || - maybeFilterValue(trieValue2, trie->nullValue, nullValue, - filter, context) != value) { - return c - 1; - } - trieValue = trieValue2; // may or may not help - } - } else { - trieValue = trieValue2; - value = maybeFilterValue(trieValue2, trie->nullValue, nullValue, - filter, context); - if (pValue != nullptr) { *pValue = value; } - haveValue = true; - } - while ((++c & dataMask) != 0) { - trieValue2 = getValue(trie->data, valueWidth, ++di); - if (trieValue2 != trieValue) { - if (filter == nullptr || - maybeFilterValue(trieValue2, trie->nullValue, nullValue, - filter, context) != value) { - return c - 1; - } - trieValue = trieValue2; // may or may not help - } - } - } - } - } while (++i3 < i3BlockLength); - } while (c < trie->highStart); - U_ASSERT(haveValue); - int32_t di = trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET; - uint32_t highValue = getValue(trie->data, valueWidth, di); - if (maybeFilterValue(highValue, trie->nullValue, nullValue, - filter, context) != value) { - return c - 1; - } else { - return MAX_UNICODE; - } -} - -} // namespace - -U_CFUNC UChar32 -ucptrie_internalGetRange(UCPTrieGetRange *getRange, - const void *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - if (option == UCPMAP_RANGE_NORMAL) { - return getRange(trie, start, filter, context, pValue); - } - uint32_t value; - if (pValue == nullptr) { - // We need to examine the range value even if the caller does not want it. - pValue = &value; - } - UChar32 surrEnd = option == UCPMAP_RANGE_FIXED_ALL_SURROGATES ? 0xdfff : 0xdbff; - UChar32 end = getRange(trie, start, filter, context, pValue); - if (end < 0xd7ff || start > surrEnd) { - return end; - } - // The range overlaps with surrogates, or ends just before the first one. - if (*pValue == surrogateValue) { - if (end >= surrEnd) { - // Surrogates followed by a non-surrogateValue range, - // or surrogates are part of a larger surrogateValue range. - return end; - } - } else { - if (start <= 0xd7ff) { - return 0xd7ff; // Non-surrogateValue range ends before surrogateValue surrogates. - } - // Start is a surrogate with a non-surrogateValue code *unit* value. - // Return a surrogateValue code *point* range. - *pValue = surrogateValue; - if (end > surrEnd) { - return surrEnd; // Surrogate range ends before non-surrogateValue rest of range. - } - } - // See if the surrogateValue surrogate range can be merged with - // an immediately following range. - uint32_t value2; - UChar32 end2 = getRange(trie, surrEnd + 1, filter, context, &value2); - if (value2 == surrogateValue) { - return end2; - } - return surrEnd; -} - -U_CAPI UChar32 U_EXPORT2 -ucptrie_getRange(const UCPTrie *trie, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - return ucptrie_internalGetRange(getRange, trie, start, - option, surrogateValue, - filter, context, pValue); -} - -U_CAPI int32_t U_EXPORT2 -ucptrie_toBinary(const UCPTrie *trie, - void *data, int32_t capacity, - UErrorCode *pErrorCode) { - if (U_FAILURE(*pErrorCode)) { - return 0; - } - - UCPTrieType type = (UCPTrieType)trie->type; - UCPTrieValueWidth valueWidth = (UCPTrieValueWidth)trie->valueWidth; - if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type || - valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth || - capacity < 0 || - (capacity > 0 && (data == nullptr || (U_POINTER_MASK_LSB(data, 3) != 0)))) { - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - int32_t length = (int32_t)sizeof(UCPTrieHeader) + trie->indexLength * 2; - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - length += trie->dataLength * 2; - break; - case UCPTRIE_VALUE_BITS_32: - length += trie->dataLength * 4; - break; - case UCPTRIE_VALUE_BITS_8: - length += trie->dataLength; - break; - default: - // unreachable - break; - } - if (capacity < length) { - *pErrorCode = U_BUFFER_OVERFLOW_ERROR; - return length; - } - - char *bytes = (char *)data; - UCPTrieHeader *header = (UCPTrieHeader *)bytes; - header->signature = UCPTRIE_SIG; // "Tri3" - header->options = (uint16_t)( - ((trie->dataLength & 0xf0000) >> 4) | - ((trie->dataNullOffset & 0xf0000) >> 8) | - (trie->type << 6) | - valueWidth); - header->indexLength = (uint16_t)trie->indexLength; - header->dataLength = (uint16_t)trie->dataLength; - header->index3NullOffset = trie->index3NullOffset; - header->dataNullOffset = (uint16_t)trie->dataNullOffset; - header->shiftedHighStart = trie->highStart >> UCPTRIE_SHIFT_2; - bytes += sizeof(UCPTrieHeader); - - uprv_memcpy(bytes, trie->index, trie->indexLength * 2); - bytes += trie->indexLength * 2; - - switch (valueWidth) { - case UCPTRIE_VALUE_BITS_16: - uprv_memcpy(bytes, trie->data.ptr16, trie->dataLength * 2); - break; - case UCPTRIE_VALUE_BITS_32: - uprv_memcpy(bytes, trie->data.ptr32, trie->dataLength * 4); - break; - case UCPTRIE_VALUE_BITS_8: - uprv_memcpy(bytes, trie->data.ptr8, trie->dataLength); - break; - default: - // unreachable - break; - } - return length; -} - -namespace { - -#ifdef UCPTRIE_DEBUG -long countNull(const UCPTrie *trie) { - uint32_t nullValue=trie->nullValue; - int32_t length=trie->dataLength; - long count=0; - switch (trie->valueWidth) { - case UCPTRIE_VALUE_BITS_16: - for(int32_t i=0; i<length; ++i) { - if(trie->data.ptr16[i]==nullValue) { ++count; } - } - break; - case UCPTRIE_VALUE_BITS_32: - for(int32_t i=0; i<length; ++i) { - if(trie->data.ptr32[i]==nullValue) { ++count; } - } - break; - case UCPTRIE_VALUE_BITS_8: - for(int32_t i=0; i<length; ++i) { - if(trie->data.ptr8[i]==nullValue) { ++count; } - } - break; - default: - // unreachable - break; - } - return count; -} - -U_CFUNC void -ucptrie_printLengths(const UCPTrie *trie, const char *which) { - long indexLength=trie->indexLength; - long dataLength=(long)trie->dataLength; - long totalLength=(long)sizeof(UCPTrieHeader)+indexLength*2+ - dataLength*(trie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 2 : - trie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 4 : 1); - printf("**UCPTrieLengths(%s %s)** index:%6ld data:%6ld countNull:%6ld serialized:%6ld\n", - which, trie->name, indexLength, dataLength, countNull(trie), totalLength); -} -#endif - -} // namespace - -// UCPMap ---- -// Initially, this is the same as UCPTrie. This may well change. - -U_CAPI uint32_t U_EXPORT2 -ucpmap_get(const UCPMap *map, UChar32 c) { - return ucptrie_get(reinterpret_cast<const UCPTrie *>(map), c); -} - -U_CAPI UChar32 U_EXPORT2 -ucpmap_getRange(const UCPMap *map, UChar32 start, - UCPMapRangeOption option, uint32_t surrogateValue, - UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { - return ucptrie_getRange(reinterpret_cast<const UCPTrie *>(map), start, - option, surrogateValue, - filter, context, pValue); -} |