aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/brkeng.cpp
diff options
context:
space:
mode:
authorromankoshelev <romankoshelev@yandex-team.com>2023-08-09 20:07:20 +0300
committerromankoshelev <romankoshelev@yandex-team.com>2023-08-09 20:59:13 +0300
commitfd82fb12fb45e71a02c628e45b12c50c0dd0d308 (patch)
treef582b79f9002ab1d083e9acda600dfb3551c47b6 /contrib/libs/icu/common/brkeng.cpp
parentbf862ddf5c6178e1bb5e4fb3f7c61015deebe284 (diff)
downloadydb-fd82fb12fb45e71a02c628e45b12c50c0dd0d308.tar.gz
Update ICU to 70.1
Diffstat (limited to 'contrib/libs/icu/common/brkeng.cpp')
-rw-r--r--contrib/libs/icu/common/brkeng.cpp39
1 files changed, 29 insertions, 10 deletions
diff --git a/contrib/libs/icu/common/brkeng.cpp b/contrib/libs/icu/common/brkeng.cpp
index 78492db662..52e9c53621 100644
--- a/contrib/libs/icu/common/brkeng.cpp
+++ b/contrib/libs/icu/common/brkeng.cpp
@@ -25,6 +25,7 @@
#include "brkeng.h"
#include "cmemory.h"
#include "dictbe.h"
+#include "lstmbe.h"
#include "charstr.h"
#include "dictionarydata.h"
#include "mutex.h"
@@ -77,7 +78,9 @@ int32_t
UnhandledEngine::findBreaks( UText *text,
int32_t /* startPos */,
int32_t endPos,
- UVector32 &/*foundBreaks*/ ) const {
+ UVector32 &/*foundBreaks*/,
+ UErrorCode &status) const {
+ if (U_FAILURE(status)) return 0;
UChar32 c = utext_current32(text);
while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
utext_next32(text); // TODO: recast loop to work with post-increment operations.
@@ -132,14 +135,13 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
static UMutex gBreakEngineMutex;
Mutex m(&gBreakEngineMutex);
- if (fEngines == NULL) {
- UStack *engines = new UStack(_deleteEngine, NULL, status);
- if (U_FAILURE(status) || engines == NULL) {
+ if (fEngines == nullptr) {
+ LocalPointer<UStack> engines(new UStack(_deleteEngine, nullptr, status), status);
+ if (U_FAILURE(status) ) {
// Note: no way to return error code to caller.
- delete engines;
- return NULL;
+ return nullptr;
}
- fEngines = engines;
+ fEngines = engines.orphan();
} else {
int32_t i = fEngines->size();
while (--i >= 0) {
@@ -152,10 +154,10 @@ ICULanguageBreakFactory::getEngineFor(UChar32 c) {
// We didn't find an engine. Create one.
lbe = loadEngineFor(c);
- if (lbe != NULL) {
+ if (lbe != nullptr) {
fEngines->push((void *)lbe, status);
}
- return lbe;
+ return U_SUCCESS(status) ? lbe : nullptr;
}
const LanguageBreakEngine *
@@ -163,9 +165,26 @@ ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
UErrorCode status = U_ZERO_ERROR;
UScriptCode code = uscript_getScript(c, &status);
if (U_SUCCESS(status)) {
+ const LanguageBreakEngine *engine = nullptr;
+ // Try to use LSTM first
+ const LSTMData *data = CreateLSTMDataForScript(code, status);
+ if (U_SUCCESS(status)) {
+ if (data != nullptr) {
+ engine = CreateLSTMBreakEngine(code, data, status);
+ if (U_SUCCESS(status) && engine != nullptr) {
+ return engine;
+ }
+ if (engine != nullptr) {
+ delete engine;
+ engine = nullptr;
+ } else {
+ DeleteLSTMData(data);
+ }
+ }
+ }
+ status = U_ZERO_ERROR; // fallback to dictionary based
DictionaryMatcher *m = loadDictionaryMatcherFor(code);
if (m != NULL) {
- const LanguageBreakEngine *engine = NULL;
switch(code) {
case USCRIPT_THAI:
engine = new ThaiBreakEngine(m, status);