Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 2 of 2.

author: neksard <neksard@yandex-team.ru> 2022-02-10 16:45:33 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:33 +0300
commit: 1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
tree: b2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/common/brkeng.cpp
parent: 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
download: ydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz
1 files changed, 248 insertions, 248 deletions
diff --git a/contrib/libs/icu/common/brkeng.cpp b/contrib/libs/icu/common/brkeng.cpp
index 6392240a03..78492db662 100644
--- a/contrib/libs/icu/common/brkeng.cpp
+++ b/contrib/libs/icu/common/brkeng.cpp
@@ -1,80 +1,80 @@
 // © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html 
-/* 
- ************************************************************************************ 
- * Copyright (C) 2006-2016, International Business Machines Corporation 
- * and others. All Rights Reserved. 
- ************************************************************************************ 
- */ 
- 
-#include "unicode/utypes.h" 
- 
-#if !UCONFIG_NO_BREAK_ITERATION 
- 
-#include "unicode/uchar.h" 
-#include "unicode/uniset.h" 
-#include "unicode/chariter.h" 
-#include "unicode/ures.h" 
-#include "unicode/udata.h" 
-#include "unicode/putil.h" 
-#include "unicode/ustring.h" 
-#include "unicode/uscript.h" 
-#include "unicode/ucharstrie.h" 
-#include "unicode/bytestrie.h" 
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ************************************************************************************
+ * Copyright (C) 2006-2016, International Business Machines Corporation
+ * and others. All Rights Reserved.
+ ************************************************************************************
+ */
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/uchar.h"
+#include "unicode/uniset.h"
+#include "unicode/chariter.h"
+#include "unicode/ures.h"
+#include "unicode/udata.h"
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "unicode/uscript.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/bytestrie.h"
 
 #include "brkeng.h"
 #include "cmemory.h"
 #include "dictbe.h"
-#include "charstr.h" 
-#include "dictionarydata.h" 
-#include "mutex.h" 
-#include "uvector.h" 
-#include "umutex.h" 
-#include "uresimp.h" 
-#include "ubrkimpl.h" 
- 
-U_NAMESPACE_BEGIN 
- 
-/* 
- ****************************************************************** 
- */ 
- 
-LanguageBreakEngine::LanguageBreakEngine() { 
-} 
- 
-LanguageBreakEngine::~LanguageBreakEngine() { 
-} 
- 
-/* 
- ****************************************************************** 
- */ 
- 
-LanguageBreakFactory::LanguageBreakFactory() { 
-} 
- 
-LanguageBreakFactory::~LanguageBreakFactory() { 
-} 
- 
-/* 
- ****************************************************************** 
- */ 
- 
+#include "charstr.h"
+#include "dictionarydata.h"
+#include "mutex.h"
+#include "uvector.h"
+#include "umutex.h"
+#include "uresimp.h"
+#include "ubrkimpl.h"
+
+U_NAMESPACE_BEGIN
+
+/*
+ ******************************************************************
+ */
+
+LanguageBreakEngine::LanguageBreakEngine() {
+}
+
+LanguageBreakEngine::~LanguageBreakEngine() {
+}
+
+/*
+ ******************************************************************
+ */
+
+LanguageBreakFactory::LanguageBreakFactory() {
+}
+
+LanguageBreakFactory::~LanguageBreakFactory() {
+}
+
+/*
+ ******************************************************************
+ */
+
 UnhandledEngine::UnhandledEngine(UErrorCode &status) : fHandled(nullptr) {
     (void)status;
-} 
- 
-UnhandledEngine::~UnhandledEngine() { 
+}
+
+UnhandledEngine::~UnhandledEngine() {
     delete fHandled;
     fHandled = nullptr;
-} 
- 
-UBool 
+}
+
+UBool
 UnhandledEngine::handles(UChar32 c) const {
     return fHandled && fHandled->contains(c);
-} 
- 
-int32_t 
-UnhandledEngine::findBreaks( UText *text, 
+}
+
+int32_t
+UnhandledEngine::findBreaks( UText *text,
                              int32_t /* startPos */,
                              int32_t endPos,
                              UVector32 &/*foundBreaks*/ ) const {
@@ -82,203 +82,203 @@ UnhandledEngine::findBreaks( UText *text,
     while((int32_t)utext_getNativeIndex(text) < endPos && fHandled->contains(c)) {
         utext_next32(text);            // TODO:  recast loop to work with post-increment operations.
         c = utext_current32(text);
-    } 
-    return 0; 
-} 
- 
-void 
+    }
+    return 0;
+}
+
+void
 UnhandledEngine::handleCharacter(UChar32 c) {
     if (fHandled == nullptr) {
         fHandled = new UnicodeSet();
         if (fHandled == nullptr) {
             return;
-        } 
-    } 
+        }
+    }
     if (!fHandled->contains(c)) {
         UErrorCode status = U_ZERO_ERROR;
         // Apply the entire script of the character.
         int32_t script = u_getIntPropertyValue(c, UCHAR_SCRIPT);
         fHandled->applyIntPropertyValue(UCHAR_SCRIPT, script, status);
     }
-} 
- 
-/* 
- ****************************************************************** 
- */ 
- 
-ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) { 
-    fEngines = 0; 
-} 
- 
-ICULanguageBreakFactory::~ICULanguageBreakFactory() { 
-    if (fEngines != 0) { 
-        delete fEngines; 
-    } 
-} 
- 
-U_NAMESPACE_END 
-U_CDECL_BEGIN 
-static void U_CALLCONV _deleteEngine(void *obj) { 
-    delete (const icu::LanguageBreakEngine *) obj; 
-} 
-U_CDECL_END 
-U_NAMESPACE_BEGIN 
- 
-const LanguageBreakEngine * 
+}
+
+/*
+ ******************************************************************
+ */
+
+ICULanguageBreakFactory::ICULanguageBreakFactory(UErrorCode &/*status*/) {
+    fEngines = 0;
+}
+
+ICULanguageBreakFactory::~ICULanguageBreakFactory() {
+    if (fEngines != 0) {
+        delete fEngines;
+    }
+}
+
+U_NAMESPACE_END
+U_CDECL_BEGIN
+static void U_CALLCONV _deleteEngine(void *obj) {
+    delete (const icu::LanguageBreakEngine *) obj;
+}
+U_CDECL_END
+U_NAMESPACE_BEGIN
+
+const LanguageBreakEngine *
 ICULanguageBreakFactory::getEngineFor(UChar32 c) {
-    const LanguageBreakEngine *lbe = NULL; 
-    UErrorCode  status = U_ZERO_ERROR; 
- 
+    const LanguageBreakEngine *lbe = NULL;
+    UErrorCode  status = U_ZERO_ERROR;
+
     static UMutex gBreakEngineMutex;
-    Mutex m(&gBreakEngineMutex); 
- 
-    if (fEngines == NULL) { 
-        UStack  *engines = new UStack(_deleteEngine, NULL, status); 
-        if (U_FAILURE(status) || engines == NULL) { 
-            // Note: no way to return error code to caller. 
-            delete engines; 
-            return NULL; 
-        } 
-        fEngines = engines; 
-    } else { 
-        int32_t i = fEngines->size(); 
-        while (--i >= 0) { 
-            lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i)); 
+    Mutex m(&gBreakEngineMutex);
+
+    if (fEngines == NULL) {
+        UStack  *engines = new UStack(_deleteEngine, NULL, status);
+        if (U_FAILURE(status) || engines == NULL) {
+            // Note: no way to return error code to caller.
+            delete engines;
+            return NULL;
+        }
+        fEngines = engines;
+    } else {
+        int32_t i = fEngines->size();
+        while (--i >= 0) {
+            lbe = (const LanguageBreakEngine *)(fEngines->elementAt(i));
             if (lbe != NULL && lbe->handles(c)) {
-                return lbe; 
-            } 
-        } 
-    } 
-     
-    // We didn't find an engine. Create one. 
+                return lbe;
+            }
+        }
+    }
+    
+    // We didn't find an engine. Create one.
     lbe = loadEngineFor(c);
-    if (lbe != NULL) { 
-        fEngines->push((void *)lbe, status); 
-    } 
-    return lbe; 
-} 
- 
-const LanguageBreakEngine * 
+    if (lbe != NULL) {
+        fEngines->push((void *)lbe, status);
+    }
+    return lbe;
+}
+
+const LanguageBreakEngine *
 ICULanguageBreakFactory::loadEngineFor(UChar32 c) {
-    UErrorCode status = U_ZERO_ERROR; 
-    UScriptCode code = uscript_getScript(c, &status); 
-    if (U_SUCCESS(status)) { 
+    UErrorCode status = U_ZERO_ERROR;
+    UScriptCode code = uscript_getScript(c, &status);
+    if (U_SUCCESS(status)) {
         DictionaryMatcher *m = loadDictionaryMatcherFor(code);
-        if (m != NULL) { 
-            const LanguageBreakEngine *engine = NULL; 
-            switch(code) { 
-            case USCRIPT_THAI: 
-                engine = new ThaiBreakEngine(m, status); 
-                break; 
-            case USCRIPT_LAO: 
-                engine = new LaoBreakEngine(m, status); 
-                break; 
-            case USCRIPT_MYANMAR: 
-                engine = new BurmeseBreakEngine(m, status); 
-                break; 
-            case USCRIPT_KHMER: 
-                engine = new KhmerBreakEngine(m, status); 
-                break; 
- 
-#if !UCONFIG_NO_NORMALIZATION 
-                // CJK not available w/o normalization 
-            case USCRIPT_HANGUL: 
-                engine = new CjkBreakEngine(m, kKorean, status); 
-                break; 
- 
-            // use same BreakEngine and dictionary for both Chinese and Japanese 
-            case USCRIPT_HIRAGANA: 
-            case USCRIPT_KATAKANA: 
-            case USCRIPT_HAN: 
-                engine = new CjkBreakEngine(m, kChineseJapanese, status); 
-                break; 
-#if 0 
-            // TODO: Have to get some characters with script=common handled 
-            // by CjkBreakEngine (e.g. U+309B). Simply subjecting 
-            // them to CjkBreakEngine does not work. The engine has to 
-            // special-case them. 
-            case USCRIPT_COMMON: 
-            { 
-                UBlockCode block = ublock_getCode(code); 
-                if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA) 
-                   engine = new CjkBreakEngine(dict, kChineseJapanese, status); 
-                break; 
-            } 
-#endif 
-#endif 
- 
-            default: 
-                break; 
-            } 
-            if (engine == NULL) { 
-                delete m; 
-            } 
-            else if (U_FAILURE(status)) { 
-                delete engine; 
-                engine = NULL; 
-            } 
-            return engine; 
-        } 
-    } 
-    return NULL; 
-} 
- 
-DictionaryMatcher * 
+        if (m != NULL) {
+            const LanguageBreakEngine *engine = NULL;
+            switch(code) {
+            case USCRIPT_THAI:
+                engine = new ThaiBreakEngine(m, status);
+                break;
+            case USCRIPT_LAO:
+                engine = new LaoBreakEngine(m, status);
+                break;
+            case USCRIPT_MYANMAR:
+                engine = new BurmeseBreakEngine(m, status);
+                break;
+            case USCRIPT_KHMER:
+                engine = new KhmerBreakEngine(m, status);
+                break;
+
+#if !UCONFIG_NO_NORMALIZATION
+                // CJK not available w/o normalization
+            case USCRIPT_HANGUL:
+                engine = new CjkBreakEngine(m, kKorean, status);
+                break;
+
+            // use same BreakEngine and dictionary for both Chinese and Japanese
+            case USCRIPT_HIRAGANA:
+            case USCRIPT_KATAKANA:
+            case USCRIPT_HAN:
+                engine = new CjkBreakEngine(m, kChineseJapanese, status);
+                break;
+#if 0
+            // TODO: Have to get some characters with script=common handled
+            // by CjkBreakEngine (e.g. U+309B). Simply subjecting
+            // them to CjkBreakEngine does not work. The engine has to
+            // special-case them.
+            case USCRIPT_COMMON:
+            {
+                UBlockCode block = ublock_getCode(code);
+                if (block == UBLOCK_HIRAGANA || block == UBLOCK_KATAKANA)
+                   engine = new CjkBreakEngine(dict, kChineseJapanese, status);
+                break;
+            }
+#endif
+#endif
+
+            default:
+                break;
+            }
+            if (engine == NULL) {
+                delete m;
+            }
+            else if (U_FAILURE(status)) {
+                delete engine;
+                engine = NULL;
+            }
+            return engine;
+        }
+    }
+    return NULL;
+}
+
+DictionaryMatcher *
 ICULanguageBreakFactory::loadDictionaryMatcherFor(UScriptCode script) { 
-    UErrorCode status = U_ZERO_ERROR; 
-    // open root from brkitr tree. 
-    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status); 
-    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status); 
-    int32_t dictnlength = 0; 
-    const UChar *dictfname = 
-        ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status); 
-    if (U_FAILURE(status)) { 
-        ures_close(b); 
-        return NULL; 
-    } 
-    CharString dictnbuf; 
-    CharString ext; 
-    const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot 
-    if (extStart != NULL) { 
-        int32_t len = (int32_t)(extStart - dictfname); 
-        ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status); 
-        dictnlength = len; 
-    } 
-    dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status); 
-    ures_close(b); 
- 
-    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status); 
-    if (U_SUCCESS(status)) { 
-        // build trie 
-        const uint8_t *data = (const uint8_t *)udata_getMemory(file); 
-        const int32_t *indexes = (const int32_t *)data; 
-        const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; 
-        const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; 
-        DictionaryMatcher *m = NULL; 
-        if (trieType == DictionaryData::TRIE_TYPE_BYTES) { 
-            const int32_t transform = indexes[DictionaryData::IX_TRANSFORM]; 
-            const char *characters = (const char *)(data + offset); 
-            m = new BytesDictionaryMatcher(characters, transform, file); 
-        } 
-        else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { 
-            const UChar *characters = (const UChar *)(data + offset); 
-            m = new UCharsDictionaryMatcher(characters, file); 
-        } 
-        if (m == NULL) { 
-            // no matcher exists to take ownership - either we are an invalid  
-            // type or memory allocation failed 
-            udata_close(file); 
-        } 
-        return m; 
-    } else if (dictfname != NULL) { 
-        // we don't have a dictionary matcher. 
-        // returning NULL here will cause us to fail to find a dictionary break engine, as expected 
-        status = U_ZERO_ERROR; 
-        return NULL; 
-    } 
-    return NULL; 
-} 
- 
-U_NAMESPACE_END 
- 
-#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 
+    UErrorCode status = U_ZERO_ERROR;
+    // open root from brkitr tree.
+    UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, "", &status);
+    b = ures_getByKeyWithFallback(b, "dictionaries", b, &status);
+    int32_t dictnlength = 0;
+    const UChar *dictfname =
+        ures_getStringByKeyWithFallback(b, uscript_getShortName(script), &dictnlength, &status);
+    if (U_FAILURE(status)) {
+        ures_close(b);
+        return NULL;
+    }
+    CharString dictnbuf;
+    CharString ext;
+    const UChar *extStart = u_memrchr(dictfname, 0x002e, dictnlength);  // last dot
+    if (extStart != NULL) {
+        int32_t len = (int32_t)(extStart - dictfname);
+        ext.appendInvariantChars(UnicodeString(FALSE, extStart + 1, dictnlength - len - 1), status);
+        dictnlength = len;
+    }
+    dictnbuf.appendInvariantChars(UnicodeString(FALSE, dictfname, dictnlength), status);
+    ures_close(b);
+
+    UDataMemory *file = udata_open(U_ICUDATA_BRKITR, ext.data(), dictnbuf.data(), &status);
+    if (U_SUCCESS(status)) {
+        // build trie
+        const uint8_t *data = (const uint8_t *)udata_getMemory(file);
+        const int32_t *indexes = (const int32_t *)data;
+        const int32_t offset = indexes[DictionaryData::IX_STRING_TRIE_OFFSET];
+        const int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK;
+        DictionaryMatcher *m = NULL;
+        if (trieType == DictionaryData::TRIE_TYPE_BYTES) {
+            const int32_t transform = indexes[DictionaryData::IX_TRANSFORM];
+            const char *characters = (const char *)(data + offset);
+            m = new BytesDictionaryMatcher(characters, transform, file);
+        }
+        else if (trieType == DictionaryData::TRIE_TYPE_UCHARS) {
+            const UChar *characters = (const UChar *)(data + offset);
+            m = new UCharsDictionaryMatcher(characters, file);
+        }
+        if (m == NULL) {
+            // no matcher exists to take ownership - either we are an invalid 
+            // type or memory allocation failed
+            udata_close(file);
+        }
+        return m;
+    } else if (dictfname != NULL) {
+        // we don't have a dictionary matcher.
+        // returning NULL here will cause us to fail to find a dictionary break engine, as expected
+        status = U_ZERO_ERROR;
+        return NULL;
+    }
+    return NULL;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
author	neksard <neksard@yandex-team.ru>	2022-02-10 16:45:33 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:33 +0300
commit	1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
tree	b2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/common/brkeng.cpp
parent	8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
download	ydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz