summaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/rbbirb.cpp
diff options
context:
space:
mode:
authorromankoshelev <[email protected]>2023-08-09 20:07:20 +0300
committerromankoshelev <[email protected]>2023-08-09 20:59:13 +0300
commitfd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch)
treef582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/common/rbbirb.cpp
parentbf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff)
Update ICU to 70.1
Diffstat (limited to 'contrib/libs/icu/common/rbbirb.cpp')
-rw-r--r--contrib/libs/icu/common/rbbirb.cpp36
1 files changed, 28 insertions, 8 deletions
diff --git a/contrib/libs/icu/common/rbbirb.cpp b/contrib/libs/icu/common/rbbirb.cpp
index 68ded32e1d0..e5c250dfe40 100644
--- a/contrib/libs/icu/common/rbbirb.cpp
+++ b/contrib/libs/icu/common/rbbirb.cpp
@@ -22,6 +22,7 @@
#include "unicode/uniset.h"
#include "unicode/uchar.h"
#include "unicode/uchriter.h"
+#include "unicode/ustring.h"
#include "unicode/parsepos.h"
#include "unicode/parseerr.h"
@@ -154,7 +155,14 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
- int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
+
+ int32_t rulesLengthInUTF8 = 0;
+ u_strToUTF8WithSub(0, 0, &rulesLengthInUTF8,
+ fStrippedRules.getBuffer(), fStrippedRules.length(),
+ 0xfffd, nullptr, fStatus);
+ *fStatus = U_ZERO_ERROR;
+
+ int32_t rulesSize = align8((rulesLengthInUTF8+1));
int32_t totalSize = headerSize
+ forwardTableSize
@@ -197,11 +205,11 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fRTableLen = reverseTableSize;
data->fTrie = data->fRTable + data->fRTableLen;
- data->fTrieLen = fSetBuilder->getTrieSize();
- data->fStatusTable = data->fTrie + trieSize;
+ data->fTrieLen = trieSize;
+ data->fStatusTable = data->fTrie + data->fTrieLen;
data->fStatusTableLen= statusTableSize;
data->fRuleSource = data->fStatusTable + statusTableSize;
- data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
+ data->fRuleSourceLen = rulesLengthInUTF8;
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
@@ -214,7 +222,12 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
}
- fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
+ u_strToUTF8WithSub((char *)data+data->fRuleSource, rulesSize, &rulesLengthInUTF8,
+ fStrippedRules.getBuffer(), fStrippedRules.length(),
+ 0xfffd, nullptr, fStatus);
+ if (U_FAILURE(*fStatus)) {
+ return NULL;
+ }
return data;
}
@@ -274,9 +287,7 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
//
// UnicodeSet processing.
- // Munge the Unicode Sets to create a set of character categories.
- // Generate the mapping tables (TRIE) from input code points to
- // the character categories.
+ // Munge the Unicode Sets to create an initial set of character categories.
//
fSetBuilder->buildRanges();
@@ -290,6 +301,12 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
}
fForwardTable->buildForwardTable();
+
+ // State table and character category optimization.
+ // Merge equivalent rows and columns.
+ // Note that this process alters the initial set of character categories,
+ // causing the representation of UnicodeSets in the parse tree to become invalid.
+
optimizeTables();
fForwardTable->buildSafeReverseTable(status);
@@ -302,6 +319,9 @@ RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
}
#endif
+ // Generate the mapping tables (TRIE) from input code points to
+ // the character categories.
+ //
fSetBuilder->buildTrie();
//