aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/i18n/brktrans.cpp
diff options
context:
space:
mode:
authorneksard <neksard@yandex-team.ru>2022-02-10 16:45:23 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:23 +0300
commit8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/brktrans.cpp
parentd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
downloadydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/brktrans.cpp')
-rw-r--r--contrib/libs/icu/i18n/brktrans.cpp374
1 files changed, 187 insertions, 187 deletions
diff --git a/contrib/libs/icu/i18n/brktrans.cpp b/contrib/libs/icu/i18n/brktrans.cpp
index 46b0e345da..e1449db1a8 100644
--- a/contrib/libs/icu/i18n/brktrans.cpp
+++ b/contrib/libs/icu/i18n/brktrans.cpp
@@ -1,195 +1,195 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2008-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-* Date Name Description
-* 05/11/2008 Andy Heninger Port from Java
-**********************************************************************
-*/
-
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2008-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 05/11/2008 Andy Heninger Port from Java
+**********************************************************************
+*/
+
#include <utility>
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
-
-#include "unicode/brkiter.h"
-#include "unicode/localpointer.h"
-#include "unicode/uchar.h"
-#include "unicode/unifilt.h"
-#include "unicode/uniset.h"
-
-#include "brktrans.h"
-#include "cmemory.h"
-#include "mutex.h"
-#include "uprops.h"
-#include "uinvchar.h"
-#include "util.h"
-#include "uvectr32.h"
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
-
-static const UChar SPACE = 32; // ' '
-
-
-/**
- * Constructs a transliterator with the default delimiters '{' and
- * '}'.
- */
-BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
- Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
- cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) {
- }
-
-
-/**
- * Destructor.
- */
-BreakTransliterator::~BreakTransliterator() {
-}
-
-/**
- * Copy constructor.
- */
-BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
- Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) {
-}
-
-
-/**
- * Transliterator API.
- */
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/localpointer.h"
+#include "unicode/uchar.h"
+#include "unicode/unifilt.h"
+#include "unicode/uniset.h"
+
+#include "brktrans.h"
+#include "cmemory.h"
+#include "mutex.h"
+#include "uprops.h"
+#include "uinvchar.h"
+#include "util.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator)
+
+static const UChar SPACE = 32; // ' '
+
+
+/**
+ * Constructs a transliterator with the default delimiters '{' and
+ * '}'.
+ */
+BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) :
+ Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter),
+ cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) {
+ }
+
+
+/**
+ * Destructor.
+ */
+BreakTransliterator::~BreakTransliterator() {
+}
+
+/**
+ * Copy constructor.
+ */
+BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) :
+ Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) {
+}
+
+
+/**
+ * Transliterator API.
+ */
BreakTransliterator* BreakTransliterator::clone() const {
- return new BreakTransliterator(*this);
-}
-
-/**
- * Implements {@link Transliterator#handleTransliterate}.
- */
-void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
- UBool isIncremental ) const {
-
- UErrorCode status = U_ZERO_ERROR;
- LocalPointer<BreakIterator> bi;
- LocalPointer<UVector32> boundaries;
-
- {
- Mutex m;
- BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
+ return new BreakTransliterator(*this);
+}
+
+/**
+ * Implements {@link Transliterator#handleTransliterate}.
+ */
+void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets,
+ UBool isIncremental ) const {
+
+ UErrorCode status = U_ZERO_ERROR;
+ LocalPointer<BreakIterator> bi;
+ LocalPointer<UVector32> boundaries;
+
+ {
+ Mutex m;
+ BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
boundaries = std::move(nonConstThis->cachedBoundaries);
bi = std::move(nonConstThis->cachedBI);
- }
- if (bi.isNull()) {
- bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
- }
- if (boundaries.isNull()) {
- boundaries.adoptInstead(new UVector32(status));
- }
-
- if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
- return;
- }
-
- boundaries->removeAllElements();
- UnicodeString sText = replaceableAsString(text);
- bi->setText(sText);
- bi->preceding(offsets.start);
-
- // To make things much easier, we will stack the boundaries, and then insert at the end.
- // generally, we won't need too many, since we will be filtered.
-
- int32_t boundary;
- for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
- if (boundary == 0) continue;
- // HACK: Check to see that preceeding item was a letter
-
- UChar32 cp = sText.char32At(boundary-1);
- int type = u_charType(cp);
- //System.out.println(Integer.toString(cp,16) + " (before): " + type);
- if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
-
- cp = sText.char32At(boundary);
- type = u_charType(cp);
- //System.out.println(Integer.toString(cp,16) + " (after): " + type);
- if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
-
- boundaries->addElement(boundary, status);
- // printf("Boundary at %d\n", boundary);
- }
-
- int delta = 0;
- int lastBoundary = 0;
-
- if (boundaries->size() != 0) { // if we found something, adjust
- delta = boundaries->size() * fInsertion.length();
- lastBoundary = boundaries->lastElementi();
-
- // we do this from the end backwards, so that we don't have to keep updating.
-
- while (boundaries->size() > 0) {
- boundary = boundaries->popi();
- text.handleReplaceBetween(boundary, boundary, fInsertion);
- }
- }
-
- // Now fix up the return values
- offsets.contextLimit += delta;
- offsets.limit += delta;
- offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
-
- // Return break iterator & boundaries vector to the cache.
- {
- Mutex m;
- BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
- if (nonConstThis->cachedBI.isNull()) {
+ }
+ if (bi.isNull()) {
+ bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status));
+ }
+ if (boundaries.isNull()) {
+ boundaries.adoptInstead(new UVector32(status));
+ }
+
+ if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) {
+ return;
+ }
+
+ boundaries->removeAllElements();
+ UnicodeString sText = replaceableAsString(text);
+ bi->setText(sText);
+ bi->preceding(offsets.start);
+
+ // To make things much easier, we will stack the boundaries, and then insert at the end.
+ // generally, we won't need too many, since we will be filtered.
+
+ int32_t boundary;
+ for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) {
+ if (boundary == 0) continue;
+ // HACK: Check to see that preceeding item was a letter
+
+ UChar32 cp = sText.char32At(boundary-1);
+ int type = u_charType(cp);
+ //System.out.println(Integer.toString(cp,16) + " (before): " + type);
+ if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
+
+ cp = sText.char32At(boundary);
+ type = u_charType(cp);
+ //System.out.println(Integer.toString(cp,16) + " (after): " + type);
+ if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue;
+
+ boundaries->addElement(boundary, status);
+ // printf("Boundary at %d\n", boundary);
+ }
+
+ int delta = 0;
+ int lastBoundary = 0;
+
+ if (boundaries->size() != 0) { // if we found something, adjust
+ delta = boundaries->size() * fInsertion.length();
+ lastBoundary = boundaries->lastElementi();
+
+ // we do this from the end backwards, so that we don't have to keep updating.
+
+ while (boundaries->size() > 0) {
+ boundary = boundaries->popi();
+ text.handleReplaceBetween(boundary, boundary, fInsertion);
+ }
+ }
+
+ // Now fix up the return values
+ offsets.contextLimit += delta;
+ offsets.limit += delta;
+ offsets.start = isIncremental ? lastBoundary + delta : offsets.limit;
+
+ // Return break iterator & boundaries vector to the cache.
+ {
+ Mutex m;
+ BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this);
+ if (nonConstThis->cachedBI.isNull()) {
nonConstThis->cachedBI = std::move(bi);
- }
- if (nonConstThis->cachedBoundaries.isNull()) {
+ }
+ if (nonConstThis->cachedBoundaries.isNull()) {
nonConstThis->cachedBoundaries = std::move(boundaries);
- }
- }
-
- // TODO: do something with U_FAILURE(status);
- // (need to look at transliterators overall, not just here.)
-}
-
-//
-// getInsertion()
-//
-const UnicodeString &BreakTransliterator::getInsertion() const {
- return fInsertion;
-}
-
-//
-// setInsertion()
-//
-void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
- this->fInsertion = insertion;
-}
-
-//
-// replaceableAsString Hack to let break iterators work
-// on the replaceable text from transliterators.
-// In practice, the only real Replaceable type that we
-// will be seeing is UnicodeString, so this function
-// will normally be efficient.
-//
-UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
- UnicodeString s;
- UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
- if (rs != NULL) {
- s = *rs;
- } else {
- r.extractBetween(0, r.length(), s);
- }
- return s;
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+ }
+ }
+
+ // TODO: do something with U_FAILURE(status);
+ // (need to look at transliterators overall, not just here.)
+}
+
+//
+// getInsertion()
+//
+const UnicodeString &BreakTransliterator::getInsertion() const {
+ return fInsertion;
+}
+
+//
+// setInsertion()
+//
+void BreakTransliterator::setInsertion(const UnicodeString &insertion) {
+ this->fInsertion = insertion;
+}
+
+//
+// replaceableAsString Hack to let break iterators work
+// on the replaceable text from transliterators.
+// In practice, the only real Replaceable type that we
+// will be seeing is UnicodeString, so this function
+// will normally be efficient.
+//
+UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) {
+ UnicodeString s;
+ UnicodeString *rs = dynamic_cast<UnicodeString *>(&r);
+ if (rs != NULL) {
+ s = *rs;
+ } else {
+ r.extractBetween(0, r.length(), s);
+ }
+ return s;
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */