diff options
author | neksard <neksard@yandex-team.ru> | 2022-02-10 16:45:23 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:23 +0300 |
commit | 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch) | |
tree | 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/brktrans.cpp | |
parent | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff) | |
download | ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz |
Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/i18n/brktrans.cpp')
-rw-r--r-- | contrib/libs/icu/i18n/brktrans.cpp | 374 |
1 files changed, 187 insertions, 187 deletions
diff --git a/contrib/libs/icu/i18n/brktrans.cpp b/contrib/libs/icu/i18n/brktrans.cpp index 46b0e345da..e1449db1a8 100644 --- a/contrib/libs/icu/i18n/brktrans.cpp +++ b/contrib/libs/icu/i18n/brktrans.cpp @@ -1,195 +1,195 @@ // © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2008-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 05/11/2008 Andy Heninger Port from Java -********************************************************************** -*/ - +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2008-2015, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 05/11/2008 Andy Heninger Port from Java +********************************************************************** +*/ + #include <utility> -#include "unicode/utypes.h" - -#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION - -#include "unicode/brkiter.h" -#include "unicode/localpointer.h" -#include "unicode/uchar.h" -#include "unicode/unifilt.h" -#include "unicode/uniset.h" - -#include "brktrans.h" -#include "cmemory.h" -#include "mutex.h" -#include "uprops.h" -#include "uinvchar.h" -#include "util.h" -#include "uvectr32.h" - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) - -static const UChar SPACE = 32; // ' ' - - -/** - * Constructs a transliterator with the default delimiters '{' and - * '}'. - */ -BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : - Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), - cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) { - } - - -/** - * Destructor. - */ -BreakTransliterator::~BreakTransliterator() { -} - -/** - * Copy constructor. - */ -BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : - Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) { -} - - -/** - * Transliterator API. - */ +#include "unicode/utypes.h" + +#if !UCONFIG_NO_TRANSLITERATION && !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/brkiter.h" +#include "unicode/localpointer.h" +#include "unicode/uchar.h" +#include "unicode/unifilt.h" +#include "unicode/uniset.h" + +#include "brktrans.h" +#include "cmemory.h" +#include "mutex.h" +#include "uprops.h" +#include "uinvchar.h" +#include "util.h" +#include "uvectr32.h" + +U_NAMESPACE_BEGIN + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(BreakTransliterator) + +static const UChar SPACE = 32; // ' ' + + +/** + * Constructs a transliterator with the default delimiters '{' and + * '}'. + */ +BreakTransliterator::BreakTransliterator(UnicodeFilter* adoptedFilter) : + Transliterator(UNICODE_STRING("Any-BreakInternal", 17), adoptedFilter), + cachedBI(NULL), cachedBoundaries(NULL), fInsertion(SPACE) { + } + + +/** + * Destructor. + */ +BreakTransliterator::~BreakTransliterator() { +} + +/** + * Copy constructor. + */ +BreakTransliterator::BreakTransliterator(const BreakTransliterator& o) : + Transliterator(o), cachedBI(NULL), cachedBoundaries(NULL), fInsertion(o.fInsertion) { +} + + +/** + * Transliterator API. + */ BreakTransliterator* BreakTransliterator::clone() const { - return new BreakTransliterator(*this); -} - -/** - * Implements {@link Transliterator#handleTransliterate}. - */ -void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, - UBool isIncremental ) const { - - UErrorCode status = U_ZERO_ERROR; - LocalPointer<BreakIterator> bi; - LocalPointer<UVector32> boundaries; - - { - Mutex m; - BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this); + return new BreakTransliterator(*this); +} + +/** + * Implements {@link Transliterator#handleTransliterate}. + */ +void BreakTransliterator::handleTransliterate(Replaceable& text, UTransPosition& offsets, + UBool isIncremental ) const { + + UErrorCode status = U_ZERO_ERROR; + LocalPointer<BreakIterator> bi; + LocalPointer<UVector32> boundaries; + + { + Mutex m; + BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this); boundaries = std::move(nonConstThis->cachedBoundaries); bi = std::move(nonConstThis->cachedBI); - } - if (bi.isNull()) { - bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status)); - } - if (boundaries.isNull()) { - boundaries.adoptInstead(new UVector32(status)); - } - - if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) { - return; - } - - boundaries->removeAllElements(); - UnicodeString sText = replaceableAsString(text); - bi->setText(sText); - bi->preceding(offsets.start); - - // To make things much easier, we will stack the boundaries, and then insert at the end. - // generally, we won't need too many, since we will be filtered. - - int32_t boundary; - for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) { - if (boundary == 0) continue; - // HACK: Check to see that preceeding item was a letter - - UChar32 cp = sText.char32At(boundary-1); - int type = u_charType(cp); - //System.out.println(Integer.toString(cp,16) + " (before): " + type); - if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; - - cp = sText.char32At(boundary); - type = u_charType(cp); - //System.out.println(Integer.toString(cp,16) + " (after): " + type); - if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; - - boundaries->addElement(boundary, status); - // printf("Boundary at %d\n", boundary); - } - - int delta = 0; - int lastBoundary = 0; - - if (boundaries->size() != 0) { // if we found something, adjust - delta = boundaries->size() * fInsertion.length(); - lastBoundary = boundaries->lastElementi(); - - // we do this from the end backwards, so that we don't have to keep updating. - - while (boundaries->size() > 0) { - boundary = boundaries->popi(); - text.handleReplaceBetween(boundary, boundary, fInsertion); - } - } - - // Now fix up the return values - offsets.contextLimit += delta; - offsets.limit += delta; - offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; - - // Return break iterator & boundaries vector to the cache. - { - Mutex m; - BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this); - if (nonConstThis->cachedBI.isNull()) { + } + if (bi.isNull()) { + bi.adoptInstead(BreakIterator::createWordInstance(Locale::getEnglish(), status)); + } + if (boundaries.isNull()) { + boundaries.adoptInstead(new UVector32(status)); + } + + if (bi.isNull() || boundaries.isNull() || U_FAILURE(status)) { + return; + } + + boundaries->removeAllElements(); + UnicodeString sText = replaceableAsString(text); + bi->setText(sText); + bi->preceding(offsets.start); + + // To make things much easier, we will stack the boundaries, and then insert at the end. + // generally, we won't need too many, since we will be filtered. + + int32_t boundary; + for(boundary = bi->next(); boundary != UBRK_DONE && boundary < offsets.limit; boundary = bi->next()) { + if (boundary == 0) continue; + // HACK: Check to see that preceeding item was a letter + + UChar32 cp = sText.char32At(boundary-1); + int type = u_charType(cp); + //System.out.println(Integer.toString(cp,16) + " (before): " + type); + if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; + + cp = sText.char32At(boundary); + type = u_charType(cp); + //System.out.println(Integer.toString(cp,16) + " (after): " + type); + if ((U_MASK(type) & (U_GC_L_MASK | U_GC_M_MASK)) == 0) continue; + + boundaries->addElement(boundary, status); + // printf("Boundary at %d\n", boundary); + } + + int delta = 0; + int lastBoundary = 0; + + if (boundaries->size() != 0) { // if we found something, adjust + delta = boundaries->size() * fInsertion.length(); + lastBoundary = boundaries->lastElementi(); + + // we do this from the end backwards, so that we don't have to keep updating. + + while (boundaries->size() > 0) { + boundary = boundaries->popi(); + text.handleReplaceBetween(boundary, boundary, fInsertion); + } + } + + // Now fix up the return values + offsets.contextLimit += delta; + offsets.limit += delta; + offsets.start = isIncremental ? lastBoundary + delta : offsets.limit; + + // Return break iterator & boundaries vector to the cache. + { + Mutex m; + BreakTransliterator *nonConstThis = const_cast<BreakTransliterator *>(this); + if (nonConstThis->cachedBI.isNull()) { nonConstThis->cachedBI = std::move(bi); - } - if (nonConstThis->cachedBoundaries.isNull()) { + } + if (nonConstThis->cachedBoundaries.isNull()) { nonConstThis->cachedBoundaries = std::move(boundaries); - } - } - - // TODO: do something with U_FAILURE(status); - // (need to look at transliterators overall, not just here.) -} - -// -// getInsertion() -// -const UnicodeString &BreakTransliterator::getInsertion() const { - return fInsertion; -} - -// -// setInsertion() -// -void BreakTransliterator::setInsertion(const UnicodeString &insertion) { - this->fInsertion = insertion; -} - -// -// replaceableAsString Hack to let break iterators work -// on the replaceable text from transliterators. -// In practice, the only real Replaceable type that we -// will be seeing is UnicodeString, so this function -// will normally be efficient. -// -UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { - UnicodeString s; - UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); - if (rs != NULL) { - s = *rs; - } else { - r.extractBetween(0, r.length(), s); - } - return s; -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_TRANSLITERATION */ + } + } + + // TODO: do something with U_FAILURE(status); + // (need to look at transliterators overall, not just here.) +} + +// +// getInsertion() +// +const UnicodeString &BreakTransliterator::getInsertion() const { + return fInsertion; +} + +// +// setInsertion() +// +void BreakTransliterator::setInsertion(const UnicodeString &insertion) { + this->fInsertion = insertion; +} + +// +// replaceableAsString Hack to let break iterators work +// on the replaceable text from transliterators. +// In practice, the only real Replaceable type that we +// will be seeing is UnicodeString, so this function +// will normally be efficient. +// +UnicodeString BreakTransliterator::replaceableAsString(Replaceable &r) { + UnicodeString s; + UnicodeString *rs = dynamic_cast<UnicodeString *>(&r); + if (rs != NULL) { + s = *rs; + } else { + r.extractBetween(0, r.length(), s); + } + return s; +} + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ |