diff options
author | romankoshelev <[email protected]> | 2023-08-09 20:07:20 +0300 |
---|---|---|
committer | romankoshelev <[email protected]> | 2023-08-09 20:59:13 +0300 |
commit | fd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch) | |
tree | f582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/common/rbbirb.cpp | |
parent | bf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff) |
Update ICU to 70.1
Diffstat (limited to 'contrib/libs/icu/common/rbbirb.cpp')
-rw-r--r-- | contrib/libs/icu/common/rbbirb.cpp | 36 |
1 files changed, 28 insertions, 8 deletions
diff --git a/contrib/libs/icu/common/rbbirb.cpp b/contrib/libs/icu/common/rbbirb.cpp index 68ded32e1d0..e5c250dfe40 100644 --- a/contrib/libs/icu/common/rbbirb.cpp +++ b/contrib/libs/icu/common/rbbirb.cpp @@ -22,6 +22,7 @@ #include "unicode/uniset.h" #include "unicode/uchar.h" #include "unicode/uchriter.h" +#include "unicode/ustring.h" #include "unicode/parsepos.h" #include "unicode/parseerr.h" @@ -154,7 +155,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize()); int32_t trieSize = align8(fSetBuilder->getTrieSize()); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); - int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar)); + + int32_t rulesLengthInUTF8 = 0; + u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8, + fStrippedRules.getBuffer(), fStrippedRules.length(), + 0xfffd, nullptr, fStatus); + *fStatus = U_ZERO_ERROR; + + int32_t rulesSize = align8((rulesLengthInUTF8+1)); int32_t totalSize = headerSize + forwardTableSize @@ -197,11 +205,11 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { data->fRTableLen = reverseTableSize; data->fTrie = data->fRTable + data->fRTableLen; - data->fTrieLen = fSetBuilder->getTrieSize(); - data->fStatusTable = data->fTrie + trieSize; + data->fTrieLen = trieSize; + data->fStatusTable = data->fTrie + data->fTrieLen; data->fStatusTableLen= statusTableSize; data->fRuleSource = data->fStatusTable + statusTableSize; - data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar); + data->fRuleSourceLen = rulesLengthInUTF8; uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); @@ -214,7 +222,12 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { ruleStatusTable[i] = fRuleStatusVals->elementAti(i); } - fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); + u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8, + fStrippedRules.getBuffer(), fStrippedRules.length(), + 0xfffd, nullptr, fStatus); + if (U_FAILURE(*fStatus)) { + return NULL; + } return data; } @@ -274,9 +287,7 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { // // UnicodeSet processing. - // Munge the Unicode Sets to create a set of character categories. - // Generate the mapping tables (TRIE) from input code points to - // the character categories. + // Munge the Unicode Sets to create an initial set of character categories. // fSetBuilder->buildRanges(); @@ -290,6 +301,12 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { } fForwardTable->buildForwardTable(); + + // State table and character category optimization. + // Merge equivalent rows and columns. + // Note that this process alters the initial set of character categories, + // causing the representation of UnicodeSets in the parse tree to become invalid. + optimizeTables(); fForwardTable->buildSafeReverseTable(status); @@ -302,6 +319,9 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) { } #endif + // Generate the mapping tables (TRIE) from input code points to + // the character categories. + // fSetBuilder->buildTrie(); // |