aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/collationdata.cpp
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:33 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:33 +0300
commit1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
treeb2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/i18n/collationdata.cpp
parent8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
downloadydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/collationdata.cpp')
-rw-r--r--contrib/libs/icu/i18n/collationdata.cpp778
1 files changed, 389 insertions, 389 deletions
diff --git a/contrib/libs/icu/i18n/collationdata.cpp b/contrib/libs/icu/i18n/collationdata.cpp
index a394e2e102..688770f8f6 100644
--- a/contrib/libs/icu/i18n/collationdata.cpp
+++ b/contrib/libs/icu/i18n/collationdata.cpp
@@ -1,390 +1,390 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 2012-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-*******************************************************************************
-* collationdata.cpp
-*
-* created on: 2012jul28
-* created by: Markus W. Scherer
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
-#include "unicode/ucol.h"
-#include "unicode/udata.h"
-#include "unicode/uscript.h"
-#include "cmemory.h"
-#include "collation.h"
-#include "collationdata.h"
-#include "uassert.h"
-#include "utrie2.h"
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-uint32_t
-CollationData::getIndirectCE32(uint32_t ce32) const {
- U_ASSERT(Collation::isSpecialCE32(ce32));
- int32_t tag = Collation::tagFromCE32(ce32);
- if(tag == Collation::DIGIT_TAG) {
- // Fetch the non-numeric-collation CE32.
- ce32 = ce32s[Collation::indexFromCE32(ce32)];
- } else if(tag == Collation::LEAD_SURROGATE_TAG) {
- ce32 = Collation::UNASSIGNED_CE32;
- } else if(tag == Collation::U0000_TAG) {
- // Fetch the normal ce32 for U+0000.
- ce32 = ce32s[0];
- }
- return ce32;
-}
-
-uint32_t
-CollationData::getFinalCE32(uint32_t ce32) const {
- if(Collation::isSpecialCE32(ce32)) {
- ce32 = getIndirectCE32(ce32);
- }
- return ce32;
-}
-
-int64_t
-CollationData::getSingleCE(UChar32 c, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) { return 0; }
- // Keep parallel with CollationDataBuilder::getSingleCE().
- const CollationData *d;
- uint32_t ce32 = getCE32(c);
- if(ce32 == Collation::FALLBACK_CE32) {
- d = base;
- ce32 = base->getCE32(c);
- } else {
- d = this;
- }
- while(Collation::isSpecialCE32(ce32)) {
- switch(Collation::tagFromCE32(ce32)) {
- case Collation::LATIN_EXPANSION_TAG:
- case Collation::BUILDER_DATA_TAG:
- case Collation::PREFIX_TAG:
- case Collation::CONTRACTION_TAG:
- case Collation::HANGUL_TAG:
- case Collation::LEAD_SURROGATE_TAG:
- errorCode = U_UNSUPPORTED_ERROR;
- return 0;
- case Collation::FALLBACK_TAG:
- case Collation::RESERVED_TAG_3:
- errorCode = U_INTERNAL_PROGRAM_ERROR;
- return 0;
- case Collation::LONG_PRIMARY_TAG:
- return Collation::ceFromLongPrimaryCE32(ce32);
- case Collation::LONG_SECONDARY_TAG:
- return Collation::ceFromLongSecondaryCE32(ce32);
- case Collation::EXPANSION32_TAG:
- if(Collation::lengthFromCE32(ce32) == 1) {
- ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
- break;
- } else {
- errorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
- case Collation::EXPANSION_TAG: {
- if(Collation::lengthFromCE32(ce32) == 1) {
- return d->ces[Collation::indexFromCE32(ce32)];
- } else {
- errorCode = U_UNSUPPORTED_ERROR;
- return 0;
- }
- }
- case Collation::DIGIT_TAG:
- // Fetch the non-numeric-collation CE32 and continue.
- ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
- break;
- case Collation::U0000_TAG:
- U_ASSERT(c == 0);
- // Fetch the normal ce32 for U+0000 and continue.
- ce32 = d->ce32s[0];
- break;
- case Collation::OFFSET_TAG:
- return d->getCEFromOffsetCE32(c, ce32);
- case Collation::IMPLICIT_TAG:
- return Collation::unassignedCEFromCodePoint(c);
- }
- }
- return Collation::ceFromSimpleCE32(ce32);
-}
-
-uint32_t
-CollationData::getFirstPrimaryForGroup(int32_t script) const {
- int32_t index = getScriptIndex(script);
- return index == 0 ? 0 : (uint32_t)scriptStarts[index] << 16;
-}
-
-uint32_t
-CollationData::getLastPrimaryForGroup(int32_t script) const {
- int32_t index = getScriptIndex(script);
- if(index == 0) {
- return 0;
- }
- uint32_t limit = scriptStarts[index + 1];
- return (limit << 16) - 1;
-}
-
-int32_t
-CollationData::getGroupForPrimary(uint32_t p) const {
- p >>= 16;
- if(p < scriptStarts[1] || scriptStarts[scriptStartsLength - 1] <= p) {
- return -1;
- }
- int32_t index = 1;
- while(p >= scriptStarts[index + 1]) { ++index; }
- for(int32_t i = 0; i < numScripts; ++i) {
- if(scriptsIndex[i] == index) {
- return i;
- }
- }
- for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
- if(scriptsIndex[numScripts + i] == index) {
- return UCOL_REORDER_CODE_FIRST + i;
- }
- }
- return -1;
-}
-
-int32_t
-CollationData::getScriptIndex(int32_t script) const {
- if(script < 0) {
- return 0;
- } else if(script < numScripts) {
- return scriptsIndex[script];
- } else if(script < UCOL_REORDER_CODE_FIRST) {
- return 0;
- } else {
- script -= UCOL_REORDER_CODE_FIRST;
- if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
- return scriptsIndex[numScripts + script];
- } else {
- return 0;
- }
- }
-}
-
-int32_t
-CollationData::getEquivalentScripts(int32_t script,
- int32_t dest[], int32_t capacity,
- UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) { return 0; }
- int32_t index = getScriptIndex(script);
- if(index == 0) { return 0; }
- if(script >= UCOL_REORDER_CODE_FIRST) {
- // Special groups have no aliases.
- if(capacity > 0) {
- dest[0] = script;
- } else {
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- }
- return 1;
- }
-
- int32_t length = 0;
- for(int32_t i = 0; i < numScripts; ++i) {
- if(scriptsIndex[i] == index) {
- if(length < capacity) {
- dest[length] = i;
- }
- ++length;
- }
- }
- if(length > capacity) {
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- }
- return length;
-}
-
-void
-CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
- UVector32 &ranges, UErrorCode &errorCode) const {
- makeReorderRanges(reorder, length, FALSE, ranges, errorCode);
-}
-
-void
-CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
- UBool latinMustMove,
- UVector32 &ranges, UErrorCode &errorCode) const {
- if(U_FAILURE(errorCode)) { return; }
- ranges.removeAllElements();
- if(length == 0 || (length == 1 && reorder[0] == USCRIPT_UNKNOWN)) {
- return;
- }
-
- // Maps each script-or-group range to a new lead byte.
- uint8_t table[MAX_NUM_SCRIPT_RANGES];
- uprv_memset(table, 0, sizeof(table));
-
- {
- // Set "don't care" values for reserved ranges.
- int32_t index = scriptsIndex[
- numScripts + REORDER_RESERVED_BEFORE_LATIN - UCOL_REORDER_CODE_FIRST];
- if(index != 0) {
- table[index] = 0xff;
- }
- index = scriptsIndex[
- numScripts + REORDER_RESERVED_AFTER_LATIN - UCOL_REORDER_CODE_FIRST];
- if(index != 0) {
- table[index] = 0xff;
- }
- }
-
- // Never reorder special low and high primary lead bytes.
- U_ASSERT(scriptStartsLength >= 2);
- U_ASSERT(scriptStarts[0] == 0);
- int32_t lowStart = scriptStarts[1];
- U_ASSERT(lowStart == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8));
- int32_t highLimit = scriptStarts[scriptStartsLength - 1];
- U_ASSERT(highLimit == (Collation::TRAIL_WEIGHT_BYTE << 8));
-
- // Get the set of special reorder codes in the input list.
- // This supports a fixed number of special reorder codes;
- // it works for data with codes beyond UCOL_REORDER_CODE_LIMIT.
- uint32_t specials = 0;
- for(int32_t i = 0; i < length; ++i) {
- int32_t reorderCode = reorder[i] - UCOL_REORDER_CODE_FIRST;
- if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
- specials |= (uint32_t)1 << reorderCode;
- }
- }
-
- // Start the reordering with the special low reorder codes that do not occur in the input.
- for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
- int32_t index = scriptsIndex[numScripts + i];
- if(index != 0 && (specials & ((uint32_t)1 << i)) == 0) {
- lowStart = addLowScriptRange(table, index, lowStart);
- }
- }
-
- // Skip the reserved range before Latin if Latin is the first script,
- // so that we do not move it unnecessarily.
- int32_t skippedReserved = 0;
- if(specials == 0 && reorder[0] == USCRIPT_LATIN && !latinMustMove) {
- int32_t index = scriptsIndex[USCRIPT_LATIN];
- U_ASSERT(index != 0);
- int32_t start = scriptStarts[index];
- U_ASSERT(lowStart <= start);
- skippedReserved = start - lowStart;
- lowStart = start;
- }
-
- // Reorder according to the input scripts, continuing from the bottom of the primary range.
- int32_t originalLength = length; // length will be decremented if "others" is in the list.
- UBool hasReorderToEnd = FALSE;
- for(int32_t i = 0; i < length;) {
- int32_t script = reorder[i++];
- if(script == USCRIPT_UNKNOWN) {
- // Put the remaining scripts at the top.
- hasReorderToEnd = TRUE;
- while(i < length) {
- script = reorder[--length];
- if(script == USCRIPT_UNKNOWN || // Must occur at most once.
- script == UCOL_REORDER_CODE_DEFAULT) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- int32_t index = getScriptIndex(script);
- if(index == 0) { continue; }
- if(table[index] != 0) { // Duplicate or equivalent script.
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- highLimit = addHighScriptRange(table, index, highLimit);
- }
- break;
- }
- if(script == UCOL_REORDER_CODE_DEFAULT) {
- // The default code must be the only one in the list, and that is handled by the caller.
- // Otherwise it must not be used.
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- int32_t index = getScriptIndex(script);
- if(index == 0) { continue; }
- if(table[index] != 0) { // Duplicate or equivalent script.
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- lowStart = addLowScriptRange(table, index, lowStart);
- }
-
- // Put all remaining scripts into the middle.
- for(int32_t i = 1; i < scriptStartsLength - 1; ++i) {
- int32_t leadByte = table[i];
- if(leadByte != 0) { continue; }
- int32_t start = scriptStarts[i];
- if(!hasReorderToEnd && start > lowStart) {
- // No need to move this script.
- lowStart = start;
- }
- lowStart = addLowScriptRange(table, i, lowStart);
- }
- if(lowStart > highLimit) {
- if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
- // Try not skipping the before-Latin reserved range.
- makeReorderRanges(reorder, originalLength, TRUE, ranges, errorCode);
- return;
- }
- // We need more primary lead bytes than available, despite the reserved ranges.
- errorCode = U_BUFFER_OVERFLOW_ERROR;
- return;
- }
-
- // Turn lead bytes into a list of (limit, offset) pairs.
- // Encode each pair in one list element:
- // Upper 16 bits = limit, lower 16 = signed lead byte offset.
- int32_t offset = 0;
- for(int32_t i = 1;; ++i) {
- int32_t nextOffset = offset;
- while(i < scriptStartsLength - 1) {
- int32_t newLeadByte = table[i];
- if(newLeadByte == 0xff) {
- // "Don't care" lead byte for reserved range, continue with current offset.
- } else {
- nextOffset = newLeadByte - (scriptStarts[i] >> 8);
- if(nextOffset != offset) { break; }
- }
- ++i;
- }
- if(offset != 0 || i < scriptStartsLength - 1) {
- ranges.addElement(((int32_t)scriptStarts[i] << 16) | (offset & 0xffff), errorCode);
- }
- if(i == scriptStartsLength - 1) { break; }
- offset = nextOffset;
- }
-}
-
-int32_t
-CollationData::addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const {
- int32_t start = scriptStarts[index];
- if((start & 0xff) < (lowStart & 0xff)) {
- lowStart += 0x100;
- }
- table[index] = (uint8_t)(lowStart >> 8);
- int32_t limit = scriptStarts[index + 1];
- lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff);
- return lowStart;
-}
-
-int32_t
-CollationData::addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit) const {
- int32_t limit = scriptStarts[index + 1];
- if((limit & 0xff) > (highLimit & 0xff)) {
- highLimit -= 0x100;
- }
- int32_t start = scriptStarts[index];
- highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff);
- table[index] = (uint8_t)(highLimit >> 8);
- return highLimit;
-}
-
-U_NAMESPACE_END
-
-#endif // !UCONFIG_NO_COLLATION
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2012-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* collationdata.cpp
+*
+* created on: 2012jul28
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/ucol.h"
+#include "unicode/udata.h"
+#include "unicode/uscript.h"
+#include "cmemory.h"
+#include "collation.h"
+#include "collationdata.h"
+#include "uassert.h"
+#include "utrie2.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+uint32_t
+CollationData::getIndirectCE32(uint32_t ce32) const {
+ U_ASSERT(Collation::isSpecialCE32(ce32));
+ int32_t tag = Collation::tagFromCE32(ce32);
+ if(tag == Collation::DIGIT_TAG) {
+ // Fetch the non-numeric-collation CE32.
+ ce32 = ce32s[Collation::indexFromCE32(ce32)];
+ } else if(tag == Collation::LEAD_SURROGATE_TAG) {
+ ce32 = Collation::UNASSIGNED_CE32;
+ } else if(tag == Collation::U0000_TAG) {
+ // Fetch the normal ce32 for U+0000.
+ ce32 = ce32s[0];
+ }
+ return ce32;
+}
+
+uint32_t
+CollationData::getFinalCE32(uint32_t ce32) const {
+ if(Collation::isSpecialCE32(ce32)) {
+ ce32 = getIndirectCE32(ce32);
+ }
+ return ce32;
+}
+
+int64_t
+CollationData::getSingleCE(UChar32 c, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) { return 0; }
+ // Keep parallel with CollationDataBuilder::getSingleCE().
+ const CollationData *d;
+ uint32_t ce32 = getCE32(c);
+ if(ce32 == Collation::FALLBACK_CE32) {
+ d = base;
+ ce32 = base->getCE32(c);
+ } else {
+ d = this;
+ }
+ while(Collation::isSpecialCE32(ce32)) {
+ switch(Collation::tagFromCE32(ce32)) {
+ case Collation::LATIN_EXPANSION_TAG:
+ case Collation::BUILDER_DATA_TAG:
+ case Collation::PREFIX_TAG:
+ case Collation::CONTRACTION_TAG:
+ case Collation::HANGUL_TAG:
+ case Collation::LEAD_SURROGATE_TAG:
+ errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ case Collation::FALLBACK_TAG:
+ case Collation::RESERVED_TAG_3:
+ errorCode = U_INTERNAL_PROGRAM_ERROR;
+ return 0;
+ case Collation::LONG_PRIMARY_TAG:
+ return Collation::ceFromLongPrimaryCE32(ce32);
+ case Collation::LONG_SECONDARY_TAG:
+ return Collation::ceFromLongSecondaryCE32(ce32);
+ case Collation::EXPANSION32_TAG:
+ if(Collation::lengthFromCE32(ce32) == 1) {
+ ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
+ break;
+ } else {
+ errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+ case Collation::EXPANSION_TAG: {
+ if(Collation::lengthFromCE32(ce32) == 1) {
+ return d->ces[Collation::indexFromCE32(ce32)];
+ } else {
+ errorCode = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+ }
+ case Collation::DIGIT_TAG:
+ // Fetch the non-numeric-collation CE32 and continue.
+ ce32 = d->ce32s[Collation::indexFromCE32(ce32)];
+ break;
+ case Collation::U0000_TAG:
+ U_ASSERT(c == 0);
+ // Fetch the normal ce32 for U+0000 and continue.
+ ce32 = d->ce32s[0];
+ break;
+ case Collation::OFFSET_TAG:
+ return d->getCEFromOffsetCE32(c, ce32);
+ case Collation::IMPLICIT_TAG:
+ return Collation::unassignedCEFromCodePoint(c);
+ }
+ }
+ return Collation::ceFromSimpleCE32(ce32);
+}
+
+uint32_t
+CollationData::getFirstPrimaryForGroup(int32_t script) const {
+ int32_t index = getScriptIndex(script);
+ return index == 0 ? 0 : (uint32_t)scriptStarts[index] << 16;
+}
+
+uint32_t
+CollationData::getLastPrimaryForGroup(int32_t script) const {
+ int32_t index = getScriptIndex(script);
+ if(index == 0) {
+ return 0;
+ }
+ uint32_t limit = scriptStarts[index + 1];
+ return (limit << 16) - 1;
+}
+
+int32_t
+CollationData::getGroupForPrimary(uint32_t p) const {
+ p >>= 16;
+ if(p < scriptStarts[1] || scriptStarts[scriptStartsLength - 1] <= p) {
+ return -1;
+ }
+ int32_t index = 1;
+ while(p >= scriptStarts[index + 1]) { ++index; }
+ for(int32_t i = 0; i < numScripts; ++i) {
+ if(scriptsIndex[i] == index) {
+ return i;
+ }
+ }
+ for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
+ if(scriptsIndex[numScripts + i] == index) {
+ return UCOL_REORDER_CODE_FIRST + i;
+ }
+ }
+ return -1;
+}
+
+int32_t
+CollationData::getScriptIndex(int32_t script) const {
+ if(script < 0) {
+ return 0;
+ } else if(script < numScripts) {
+ return scriptsIndex[script];
+ } else if(script < UCOL_REORDER_CODE_FIRST) {
+ return 0;
+ } else {
+ script -= UCOL_REORDER_CODE_FIRST;
+ if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
+ return scriptsIndex[numScripts + script];
+ } else {
+ return 0;
+ }
+ }
+}
+
+int32_t
+CollationData::getEquivalentScripts(int32_t script,
+ int32_t dest[], int32_t capacity,
+ UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) { return 0; }
+ int32_t index = getScriptIndex(script);
+ if(index == 0) { return 0; }
+ if(script >= UCOL_REORDER_CODE_FIRST) {
+ // Special groups have no aliases.
+ if(capacity > 0) {
+ dest[0] = script;
+ } else {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return 1;
+ }
+
+ int32_t length = 0;
+ for(int32_t i = 0; i < numScripts; ++i) {
+ if(scriptsIndex[i] == index) {
+ if(length < capacity) {
+ dest[length] = i;
+ }
+ ++length;
+ }
+ }
+ if(length > capacity) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return length;
+}
+
+void
+CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
+ UVector32 &ranges, UErrorCode &errorCode) const {
+ makeReorderRanges(reorder, length, FALSE, ranges, errorCode);
+}
+
+void
+CollationData::makeReorderRanges(const int32_t *reorder, int32_t length,
+ UBool latinMustMove,
+ UVector32 &ranges, UErrorCode &errorCode) const {
+ if(U_FAILURE(errorCode)) { return; }
+ ranges.removeAllElements();
+ if(length == 0 || (length == 1 && reorder[0] == USCRIPT_UNKNOWN)) {
+ return;
+ }
+
+ // Maps each script-or-group range to a new lead byte.
+ uint8_t table[MAX_NUM_SCRIPT_RANGES];
+ uprv_memset(table, 0, sizeof(table));
+
+ {
+ // Set "don't care" values for reserved ranges.
+ int32_t index = scriptsIndex[
+ numScripts + REORDER_RESERVED_BEFORE_LATIN - UCOL_REORDER_CODE_FIRST];
+ if(index != 0) {
+ table[index] = 0xff;
+ }
+ index = scriptsIndex[
+ numScripts + REORDER_RESERVED_AFTER_LATIN - UCOL_REORDER_CODE_FIRST];
+ if(index != 0) {
+ table[index] = 0xff;
+ }
+ }
+
+ // Never reorder special low and high primary lead bytes.
+ U_ASSERT(scriptStartsLength >= 2);
+ U_ASSERT(scriptStarts[0] == 0);
+ int32_t lowStart = scriptStarts[1];
+ U_ASSERT(lowStart == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8));
+ int32_t highLimit = scriptStarts[scriptStartsLength - 1];
+ U_ASSERT(highLimit == (Collation::TRAIL_WEIGHT_BYTE << 8));
+
+ // Get the set of special reorder codes in the input list.
+ // This supports a fixed number of special reorder codes;
+ // it works for data with codes beyond UCOL_REORDER_CODE_LIMIT.
+ uint32_t specials = 0;
+ for(int32_t i = 0; i < length; ++i) {
+ int32_t reorderCode = reorder[i] - UCOL_REORDER_CODE_FIRST;
+ if(0 <= reorderCode && reorderCode < MAX_NUM_SPECIAL_REORDER_CODES) {
+ specials |= (uint32_t)1 << reorderCode;
+ }
+ }
+
+ // Start the reordering with the special low reorder codes that do not occur in the input.
+ for(int32_t i = 0; i < MAX_NUM_SPECIAL_REORDER_CODES; ++i) {
+ int32_t index = scriptsIndex[numScripts + i];
+ if(index != 0 && (specials & ((uint32_t)1 << i)) == 0) {
+ lowStart = addLowScriptRange(table, index, lowStart);
+ }
+ }
+
+ // Skip the reserved range before Latin if Latin is the first script,
+ // so that we do not move it unnecessarily.
+ int32_t skippedReserved = 0;
+ if(specials == 0 && reorder[0] == USCRIPT_LATIN && !latinMustMove) {
+ int32_t index = scriptsIndex[USCRIPT_LATIN];
+ U_ASSERT(index != 0);
+ int32_t start = scriptStarts[index];
+ U_ASSERT(lowStart <= start);
+ skippedReserved = start - lowStart;
+ lowStart = start;
+ }
+
+ // Reorder according to the input scripts, continuing from the bottom of the primary range.
+ int32_t originalLength = length; // length will be decremented if "others" is in the list.
+ UBool hasReorderToEnd = FALSE;
+ for(int32_t i = 0; i < length;) {
+ int32_t script = reorder[i++];
+ if(script == USCRIPT_UNKNOWN) {
+ // Put the remaining scripts at the top.
+ hasReorderToEnd = TRUE;
+ while(i < length) {
+ script = reorder[--length];
+ if(script == USCRIPT_UNKNOWN || // Must occur at most once.
+ script == UCOL_REORDER_CODE_DEFAULT) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ int32_t index = getScriptIndex(script);
+ if(index == 0) { continue; }
+ if(table[index] != 0) { // Duplicate or equivalent script.
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ highLimit = addHighScriptRange(table, index, highLimit);
+ }
+ break;
+ }
+ if(script == UCOL_REORDER_CODE_DEFAULT) {
+ // The default code must be the only one in the list, and that is handled by the caller.
+ // Otherwise it must not be used.
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ int32_t index = getScriptIndex(script);
+ if(index == 0) { continue; }
+ if(table[index] != 0) { // Duplicate or equivalent script.
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ lowStart = addLowScriptRange(table, index, lowStart);
+ }
+
+ // Put all remaining scripts into the middle.
+ for(int32_t i = 1; i < scriptStartsLength - 1; ++i) {
+ int32_t leadByte = table[i];
+ if(leadByte != 0) { continue; }
+ int32_t start = scriptStarts[i];
+ if(!hasReorderToEnd && start > lowStart) {
+ // No need to move this script.
+ lowStart = start;
+ }
+ lowStart = addLowScriptRange(table, i, lowStart);
+ }
+ if(lowStart > highLimit) {
+ if((lowStart - (skippedReserved & 0xff00)) <= highLimit) {
+ // Try not skipping the before-Latin reserved range.
+ makeReorderRanges(reorder, originalLength, TRUE, ranges, errorCode);
+ return;
+ }
+ // We need more primary lead bytes than available, despite the reserved ranges.
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+
+ // Turn lead bytes into a list of (limit, offset) pairs.
+ // Encode each pair in one list element:
+ // Upper 16 bits = limit, lower 16 = signed lead byte offset.
+ int32_t offset = 0;
+ for(int32_t i = 1;; ++i) {
+ int32_t nextOffset = offset;
+ while(i < scriptStartsLength - 1) {
+ int32_t newLeadByte = table[i];
+ if(newLeadByte == 0xff) {
+ // "Don't care" lead byte for reserved range, continue with current offset.
+ } else {
+ nextOffset = newLeadByte - (scriptStarts[i] >> 8);
+ if(nextOffset != offset) { break; }
+ }
+ ++i;
+ }
+ if(offset != 0 || i < scriptStartsLength - 1) {
+ ranges.addElement(((int32_t)scriptStarts[i] << 16) | (offset & 0xffff), errorCode);
+ }
+ if(i == scriptStartsLength - 1) { break; }
+ offset = nextOffset;
+ }
+}
+
+int32_t
+CollationData::addLowScriptRange(uint8_t table[], int32_t index, int32_t lowStart) const {
+ int32_t start = scriptStarts[index];
+ if((start & 0xff) < (lowStart & 0xff)) {
+ lowStart += 0x100;
+ }
+ table[index] = (uint8_t)(lowStart >> 8);
+ int32_t limit = scriptStarts[index + 1];
+ lowStart = ((lowStart & 0xff00) + ((limit & 0xff00) - (start & 0xff00))) | (limit & 0xff);
+ return lowStart;
+}
+
+int32_t
+CollationData::addHighScriptRange(uint8_t table[], int32_t index, int32_t highLimit) const {
+ int32_t limit = scriptStarts[index + 1];
+ if((limit & 0xff) > (highLimit & 0xff)) {
+ highLimit -= 0x100;
+ }
+ int32_t start = scriptStarts[index];
+ highLimit = ((highLimit & 0xff00) - ((limit & 0xff00) - (start & 0xff00))) | (start & 0xff);
+ table[index] = (uint8_t)(highLimit >> 8);
+ return highLimit;
+}
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_COLLATION