aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/lstmbe.cpp
diff options
context:
space:
mode:
authorromankoshelev <romankoshelev@yandex-team.com>2023-08-14 19:51:50 +0300
committerromankoshelev <romankoshelev@yandex-team.com>2023-08-15 01:24:11 +0300
commitcfcd865e05c0d0525ea27d1e153a043b32a85138 (patch)
tree68d3b3b25271e8a4998505897a269ff7ce119b76 /contrib/libs/icu/common/lstmbe.cpp
parentccb790c507bd5e8ffe2ef9886ce5ee0a7ce22a15 (diff)
downloadydb-cfcd865e05c0d0525ea27d1e153a043b32a85138.tar.gz
Update ICU to 73.2
Diffstat (limited to 'contrib/libs/icu/common/lstmbe.cpp')
-rw-r--r--contrib/libs/icu/common/lstmbe.cpp21
1 files changed, 11 insertions, 10 deletions
diff --git a/contrib/libs/icu/common/lstmbe.cpp b/contrib/libs/icu/common/lstmbe.cpp
index 3793abceb3..fb8eb01761 100644
--- a/contrib/libs/icu/common/lstmbe.cpp
+++ b/contrib/libs/icu/common/lstmbe.cpp
@@ -1,8 +1,8 @@
// © 2021 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
+#include <complex>
#include <utility>
-#include <ctgmath>
#include "unicode/utypes.h"
@@ -361,7 +361,7 @@ struct LSTMData : public UMemory {
~LSTMData();
UHashtable* fDict;
EmbeddingType fType;
- const UChar* fName;
+ const char16_t* fName;
ConstArray2D fEmbedding;
ConstArray2D fForwardW;
ConstArray2D fForwardU;
@@ -394,7 +394,7 @@ LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status)
ures_getByKey(rb, "hunits", nullptr, &status));
if (U_FAILURE(status)) return;
int32_t hunits = ures_getInt(hunits_res.getAlias(), &status);
- const UChar* type = ures_getStringByKey(rb, "type", nullptr, &status);
+ const char16_t* type = ures_getStringByKey(rb, "type", nullptr, &status);
if (U_FAILURE(status)) return;
if (u_strCompare(type, -1, u"codepoints", -1, false) == 0) {
fType = CODE_POINTS;
@@ -419,7 +419,7 @@ LSTMData::LSTMData(UResourceBundle* rb, UErrorCode &status)
int32_t stringLength;
for (int32_t idx = 0; idx < num_index; idx++) {
stringArray.getValue(idx, value);
- const UChar* str = value.getString(stringLength, status);
+ const char16_t* str = value.getString(stringLength, status);
uhash_putiAllowZero(fDict, (void*)str, idx, &status);
if (U_FAILURE(status)) return;
#ifdef LSTM_VECTORIZER_DEBUG
@@ -477,7 +477,7 @@ public:
UVector32 &offsets, UVector32 &indices,
UErrorCode &status) const = 0;
protected:
- int32_t stringToIndex(const UChar* str) const {
+ int32_t stringToIndex(const char16_t* str) const {
UBool found = false;
int32_t ret = uhash_getiAndFound(fDict, (const void*)str, &found);
if (!found) {
@@ -524,13 +524,13 @@ void CodePointsVectorizer::vectorize(
if (U_FAILURE(status)) return;
utext_setNativeIndex(text, startPos);
int32_t current;
- UChar str[2] = {0, 0};
+ char16_t str[2] = {0, 0};
while (U_SUCCESS(status) &&
(current = (int32_t)utext_getNativeIndex(text)) < endPos) {
// Since the LSTMBreakEngine is currently only accept chars in BMP,
// we can ignore the possibility of hitting supplementary code
// point.
- str[0] = (UChar) utext_next32(text);
+ str[0] = (char16_t) utext_next32(text);
U_ASSERT(!U_IS_SURROGATE(str[0]));
offsets.addElement(current, status);
indices.addElement(stringToIndex(str), status);
@@ -576,7 +576,7 @@ void GraphemeClusterVectorizer::vectorize(
}
int32_t last = startPos;
int32_t current = startPos;
- UChar str[MAX_GRAPHEME_CLSTER_LENGTH];
+ char16_t str[MAX_GRAPHEME_CLSTER_LENGTH];
while ((current = graphemeIter->next()) != BreakIterator::DONE) {
if (current >= endPos) {
break;
@@ -639,6 +639,7 @@ LSTMBreakEngine::divideUpDictionaryRange( UText *text,
int32_t startPos,
int32_t endPos,
UVector32 &foundBreaks,
+ UBool /* isPhraseBreaking */,
UErrorCode& status) const {
if (U_FAILURE(status)) return 0;
int32_t beginFoundBreakSize = foundBreaks.size();
@@ -776,7 +777,7 @@ LSTMBreakEngine::~LSTMBreakEngine() {
delete fVectorizer;
}
-const UChar* LSTMBreakEngine::name() const {
+const char16_t* LSTMBreakEngine::name() const {
return fData->fName;
}
@@ -845,7 +846,7 @@ U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data)
delete data;
}
-U_CAPI const UChar* U_EXPORT2 LSTMDataName(const LSTMData* data)
+U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data)
{
return data->fName;
}