aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/nortrans.cpp
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:23 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:23 +0300
commit8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/nortrans.cpp
parentd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
downloadydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/nortrans.cpp')
-rw-r--r--contrib/libs/icu/i18n/nortrans.cpp352
1 files changed, 176 insertions, 176 deletions
diff --git a/contrib/libs/icu/i18n/nortrans.cpp b/contrib/libs/icu/i18n/nortrans.cpp
index 6a8d2c7419..32d8186b3b 100644
--- a/contrib/libs/icu/i18n/nortrans.cpp
+++ b/contrib/libs/icu/i18n/nortrans.cpp
@@ -1,178 +1,178 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2001-2011, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 07/03/01 aliu Creation.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_TRANSLITERATION
-
-#include "unicode/normalizer2.h"
-#include "unicode/utf16.h"
-#include "cstring.h"
-#include "nortrans.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)
-
-static inline Transliterator::Token cstrToken(const char *s) {
- return Transliterator::pointerToken((void *)s);
-}
-
-/**
- * System registration hook.
- */
-void NormalizationTransliterator::registerIDs() {
- // In the Token, the byte after the NUL is the UNormalization2Mode.
- Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),
- _create, cstrToken("nfc\0\0"));
- Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),
- _create, cstrToken("nfkc\0\0"));
- Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),
- _create, cstrToken("nfc\0\1"));
- Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),
- _create, cstrToken("nfkc\0\1"));
- Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCD"),
- _create, cstrToken("nfc\0\2"));
- Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCC"),
- _create, cstrToken("nfc\0\3"));
- Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),
- UNICODE_STRING_SIMPLE("NFD"), TRUE);
- Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),
- UNICODE_STRING_SIMPLE("NFKD"), TRUE);
- Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCC"),
- UNICODE_STRING_SIMPLE("NFD"), FALSE);
- Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCD"),
- UNICODE_STRING_SIMPLE("FCD"), FALSE);
-}
-
-/**
- * Factory methods
- */
-Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
- Token context) {
- const char *name = (const char *)context.pointer;
- UNormalization2Mode mode = (UNormalization2Mode)uprv_strchr(name, 0)[1];
- UErrorCode errorCode = U_ZERO_ERROR;
- const Normalizer2 *norm2 = Normalizer2::getInstance(NULL, name, mode, errorCode);
- if(U_SUCCESS(errorCode)) {
- return new NormalizationTransliterator(ID, *norm2);
- } else {
- return NULL;
- }
-}
-
-/**
- * Constructs a transliterator.
- */
-NormalizationTransliterator::NormalizationTransliterator(const UnicodeString& id,
- const Normalizer2 &norm2) :
- Transliterator(id, 0), fNorm2(norm2) {}
-
-/**
- * Destructor.
- */
-NormalizationTransliterator::~NormalizationTransliterator() {
-}
-
-/**
- * Copy constructor.
- */
-NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
- Transliterator(o), fNorm2(o.fNorm2) {}
-
-/**
- * Transliterator API.
- */
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2001-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 07/03/01 aliu Creation.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+#include "unicode/normalizer2.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+#include "nortrans.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NormalizationTransliterator)
+
+static inline Transliterator::Token cstrToken(const char *s) {
+ return Transliterator::pointerToken((void *)s);
+}
+
+/**
+ * System registration hook.
+ */
+void NormalizationTransliterator::registerIDs() {
+ // In the Token, the byte after the NUL is the UNormalization2Mode.
+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFC"),
+ _create, cstrToken("nfc\0\0"));
+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKC"),
+ _create, cstrToken("nfkc\0\0"));
+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFD"),
+ _create, cstrToken("nfc\0\1"));
+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-NFKD"),
+ _create, cstrToken("nfkc\0\1"));
+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCD"),
+ _create, cstrToken("nfc\0\2"));
+ Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-FCC"),
+ _create, cstrToken("nfc\0\3"));
+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFC"),
+ UNICODE_STRING_SIMPLE("NFD"), TRUE);
+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("NFKC"),
+ UNICODE_STRING_SIMPLE("NFKD"), TRUE);
+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCC"),
+ UNICODE_STRING_SIMPLE("NFD"), FALSE);
+ Transliterator::_registerSpecialInverse(UNICODE_STRING_SIMPLE("FCD"),
+ UNICODE_STRING_SIMPLE("FCD"), FALSE);
+}
+
+/**
+ * Factory methods
+ */
+Transliterator* NormalizationTransliterator::_create(const UnicodeString& ID,
+ Token context) {
+ const char *name = (const char *)context.pointer;
+ UNormalization2Mode mode = (UNormalization2Mode)uprv_strchr(name, 0)[1];
+ UErrorCode errorCode = U_ZERO_ERROR;
+ const Normalizer2 *norm2 = Normalizer2::getInstance(NULL, name, mode, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return new NormalizationTransliterator(ID, *norm2);
+ } else {
+ return NULL;
+ }
+}
+
+/**
+ * Constructs a transliterator.
+ */
+NormalizationTransliterator::NormalizationTransliterator(const UnicodeString& id,
+ const Normalizer2 &norm2) :
+ Transliterator(id, 0), fNorm2(norm2) {}
+
+/**
+ * Destructor.
+ */
+NormalizationTransliterator::~NormalizationTransliterator() {
+}
+
+/**
+ * Copy constructor.
+ */
+NormalizationTransliterator::NormalizationTransliterator(const NormalizationTransliterator& o) :
+ Transliterator(o), fNorm2(o.fNorm2) {}
+
+/**
+ * Transliterator API.
+ */
NormalizationTransliterator* NormalizationTransliterator::clone() const {
- return new NormalizationTransliterator(*this);
-}
-
-/**
- * Implements {@link Transliterator#handleTransliterate}.
- */
-void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
- UBool isIncremental) const {
- // start and limit of the input range
- int32_t start = offsets.start;
- int32_t limit = offsets.limit;
- if(start >= limit) {
- return;
- }
-
- /*
- * Normalize as short chunks at a time as possible even in
- * bulk mode, so that styled text is minimally disrupted.
- * In incremental mode, a chunk that ends with offsets.limit
- * must not be normalized.
- *
- * If it was known that the input text is not styled, then
- * a bulk mode normalization could look like this:
-
- UnicodeString input, normalized;
- int32_t length = limit - start;
- _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
- input.releaseBuffer(length);
-
- UErrorCode status = U_ZERO_ERROR;
- fNorm2.normalize(input, normalized, status);
-
- text.handleReplaceBetween(start, limit, normalized);
-
- int32_t delta = normalized.length() - length;
- offsets.contextLimit += delta;
- offsets.limit += delta;
- offsets.start = limit + delta;
-
- */
- UErrorCode errorCode = U_ZERO_ERROR;
- UnicodeString segment;
- UnicodeString normalized;
- UChar32 c = text.char32At(start);
- do {
- int32_t prev = start;
- // Skip at least one character so we make progress.
- // c holds the character at start.
- segment.remove();
- do {
- segment.append(c);
- start += U16_LENGTH(c);
- } while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start)));
- if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) {
- // stop in incremental mode when we reach the input limit
- // in case there are additional characters that could change the
- // normalization result
- start=prev;
- break;
- }
- fNorm2.normalize(segment, normalized, errorCode);
- if(U_FAILURE(errorCode)) {
- break;
- }
- if(segment != normalized) {
- // replace the input chunk with its normalized form
- text.handleReplaceBetween(prev, start, normalized);
-
- // update all necessary indexes accordingly
- int32_t delta = normalized.length() - (start - prev);
- start += delta;
- limit += delta;
- }
- } while(start < limit);
-
- offsets.start = start;
- offsets.contextLimit += limit - offsets.limit;
- offsets.limit = limit;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+ return new NormalizationTransliterator(*this);
+}
+
+/**
+ * Implements {@link Transliterator#handleTransliterate}.
+ */
+void NormalizationTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
+ UBool isIncremental) const {
+ // start and limit of the input range
+ int32_t start = offsets.start;
+ int32_t limit = offsets.limit;
+ if(start >= limit) {
+ return;
+ }
+
+ /*
+ * Normalize as short chunks at a time as possible even in
+ * bulk mode, so that styled text is minimally disrupted.
+ * In incremental mode, a chunk that ends with offsets.limit
+ * must not be normalized.
+ *
+ * If it was known that the input text is not styled, then
+ * a bulk mode normalization could look like this:
+
+ UnicodeString input, normalized;
+ int32_t length = limit - start;
+ _Replaceable_extractBetween(text, start, limit, input.getBuffer(length));
+ input.releaseBuffer(length);
+
+ UErrorCode status = U_ZERO_ERROR;
+ fNorm2.normalize(input, normalized, status);
+
+ text.handleReplaceBetween(start, limit, normalized);
+
+ int32_t delta = normalized.length() - length;
+ offsets.contextLimit += delta;
+ offsets.limit += delta;
+ offsets.start = limit + delta;
+
+ */
+ UErrorCode errorCode = U_ZERO_ERROR;
+ UnicodeString segment;
+ UnicodeString normalized;
+ UChar32 c = text.char32At(start);
+ do {
+ int32_t prev = start;
+ // Skip at least one character so we make progress.
+ // c holds the character at start.
+ segment.remove();
+ do {
+ segment.append(c);
+ start += U16_LENGTH(c);
+ } while(start < limit && !fNorm2.hasBoundaryBefore(c = text.char32At(start)));
+ if(start == limit && isIncremental && !fNorm2.hasBoundaryAfter(c)) {
+ // stop in incremental mode when we reach the input limit
+ // in case there are additional characters that could change the
+ // normalization result
+ start=prev;
+ break;
+ }
+ fNorm2.normalize(segment, normalized, errorCode);
+ if(U_FAILURE(errorCode)) {
+ break;
+ }
+ if(segment != normalized) {
+ // replace the input chunk with its normalized form
+ text.handleReplaceBetween(prev, start, normalized);
+
+ // update all necessary indexes accordingly
+ int32_t delta = normalized.length() - (start - prev);
+ start += delta;
+ limit += delta;
+ }
+ } while(start < limit);
+
+ offsets.start = start;
+ offsets.contextLimit += limit - offsets.limit;
+ offsets.limit = limit;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */