diff options
author | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-09 20:07:20 +0300 |
---|---|---|
committer | romankoshelev <romankoshelev@yandex-team.com> | 2023-08-09 20:59:13 +0300 |
commit | fd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch) | |
tree | f582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/common/brkeng.cpp | |
parent | bf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff) | |
download | ydb-fd82fb12fb45e71a02c628e45b12c50c0dd0d308.tar.gz |
Update ICU to 70.1
Diffstat (limited to 'contrib/libs/icu/common/brkeng.cpp')
-rw-r--r-- | contrib/libs/icu/common/brkeng.cpp | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/contrib/libs/icu/common/brkeng.cpp b/contrib/libs/icu/common/brkeng.cpp index 78492db662..52e9c53621 100644 --- a/contrib/libs/icu/common/brkeng.cpp +++ b/contrib/libs/icu/common/brkeng.cpp @@ -25,6 +25,7 @@ #include "brkeng.h" #include "cmemory.h" #include "dictbe.h" +#include "lstmbe.h" #include "charstr.h" #include "dictionarydata.h" #include "mutex.h" @@ -77,7 +78,9 @@ int32_t UnhandledEngine::findBreaks( UText *text, int32_t /* startPos */, int32_t endPos, - UVector32 &/*foundBreaks*/ ) const { + UVector32 &/*foundBreaks*/, + UErrorCode &status) const { + if (U_FAILURE(status)) return 0; UChar32 c = utext_current32(text); while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) { utext_next32(text); // TODO: recast loop to work with post-increment operations. @@ -132,14 +135,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) { static UMutex gBreakEngineMutex; Mutex m(&gBreakEngineMutex); - if (fEngines == NULL) { - UStack *engines = new UStack(_deleteEngine, NULL, status); - if (U_FAILURE(status) || engines == NULL) { + if (fEngines == nullptr) { + LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status); + if (U_FAILURE(status) ) { // Note: no way to return error code to caller. - delete engines; - return NULL; + return nullptr; } - fEngines = engines; + fEngines = engines.orphan(); } else { int32_t i = fEngines->size(); while (--i >= 0) { @@ -152,10 +154,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) { // We didn't find an engine. Create one. lbe = loadEngineFor(c); - if (lbe != NULL) { + if (lbe != nullptr) { fEngines->push((void *)lbe, status); } - return lbe; + return U_SUCCESS(status) ? lbe : nullptr; } const LanguageBreakEngine * @@ -163,9 +165,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) { UErrorCode status = U_ZERO_ERROR; UScriptCode code = uscript_getScript(c, &status); if (U_SUCCESS(status)) { + const LanguageBreakEngine *engine = nullptr; + // Try to use LSTM first + const LSTMData *data = CreateLSTMDataForScript(code, status); + if (U_SUCCESS(status)) { + if (data != nullptr) { + engine = CreateLSTMBreakEngine(code, data, status); + if (U_SUCCESS(status) && engine != nullptr) { + return engine; + } + if (engine != nullptr) { + delete engine; + engine = nullptr; + } else { + DeleteLSTMData(data); + } + } + } + status = U_ZERO_ERROR; // fallback to dictionary based DictionaryMatcher *m = loadDictionaryMatcherFor(code); if (m != NULL) { - const LanguageBreakEngine *engine = NULL; switch(code) { case USCRIPT_THAI: engine = new ThaiBreakEngine(m, status); |