Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 1 of 2.

author: neksard <neksard@yandex-team.ru> 2022-02-10 16:45:23 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:23 +0300
commit: 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree: 83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/coleitr.cpp
parent: d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download: ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz
1 files changed, 469 insertions, 469 deletions
diff --git a/contrib/libs/icu/i18n/coleitr.cpp b/contrib/libs/icu/i18n/coleitr.cpp
index 64d3ab4d2b..912ae63afc 100644
--- a/contrib/libs/icu/i18n/coleitr.cpp
+++ b/contrib/libs/icu/i18n/coleitr.cpp
@@ -1,473 +1,473 @@
 // © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-*******************************************************************************
-* Copyright (C) 1996-2014, International Business Machines Corporation and
-* others. All Rights Reserved.
-*******************************************************************************
-*/
-
-/*
-* File coleitr.cpp
-*
-* Created by: Helena Shih
-*
-* Modification History:
-*
-*  Date      Name        Description
-*
-*  6/23/97   helena      Adding comments to make code more readable.
-* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
-* 12/10/99   aliu        Ported Thai collation support from Java.
-* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
-* 02/19/01   swquek      Removed CollationElementIterator() since it is 
-*                        private constructor and no calls are made to it
-* 2012-2014  markus      Rewritten in C++ again.
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_COLLATION
-
+// License & terms of use: http://www.unicode.org/copyright.html 
+/* 
+******************************************************************************* 
+* Copyright (C) 1996-2014, International Business Machines Corporation and 
+* others. All Rights Reserved. 
+******************************************************************************* 
+*/ 
+ 
+/* 
+* File coleitr.cpp 
+* 
+* Created by: Helena Shih 
+* 
+* Modification History: 
+* 
+*  Date      Name        Description 
+* 
+*  6/23/97   helena      Adding comments to make code more readable. 
+* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java 
+* 12/10/99   aliu        Ported Thai collation support from Java. 
+* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h) 
+* 02/19/01   swquek      Removed CollationElementIterator() since it is  
+*                        private constructor and no calls are made to it 
+* 2012-2014  markus      Rewritten in C++ again. 
+*/ 
+ 
+#include "unicode/utypes.h" 
+ 
+#if !UCONFIG_NO_COLLATION 
+ 
 #include "unicode/chariter.h"
-#include "unicode/coleitr.h"
-#include "unicode/tblcoll.h"
-#include "unicode/ustring.h"
-#include "cmemory.h"
-#include "collation.h"
-#include "collationdata.h"
-#include "collationiterator.h"
-#include "collationsets.h"
-#include "collationtailoring.h"
-#include "uassert.h"
-#include "uhash.h"
-#include "utf16collationiterator.h"
-#include "uvectr32.h"
-
-/* Constants --------------------------------------------------------------- */
-
-U_NAMESPACE_BEGIN
-
-UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
-
-/* CollationElementIterator public constructor/destructor ------------------ */
-
-CollationElementIterator::CollationElementIterator(
-                                         const CollationElementIterator& other) 
-        : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offsets_(NULL) {
-    *this = other;
-}
-
-CollationElementIterator::~CollationElementIterator()
-{
-    delete iter_;
-    delete offsets_;
-}
-
-/* CollationElementIterator public methods --------------------------------- */
-
-namespace {
-
-uint32_t getFirstHalf(uint32_t p, uint32_t lower32) {
-    return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
-}
-uint32_t getSecondHalf(uint32_t p, uint32_t lower32) {
-    return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
-}
-UBool ceNeedsTwoParts(int64_t ce) {
-    return (ce & INT64_C(0xffff00ff003f)) != 0;
-}
-
-}  // namespace
-
-int32_t CollationElementIterator::getOffset() const
-{
-    if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) {
-        // CollationIterator::previousCE() decrements the CEs length
-        // while it pops CEs from its internal buffer.
-        int32_t i = iter_->getCEsLength();
-        if (otherHalf_ != 0) {
-            // Return the trailing CE offset while we are in the middle of a 64-bit CE.
-            ++i;
-        }
-        U_ASSERT(i < offsets_->size());
-        return offsets_->elementAti(i);
-    }
-    return iter_->getOffset();
-}
-
-/**
-* Get the ordering priority of the next character in the string.
-* @return the next character's ordering. Returns NULLORDER if an error has 
-*         occured or if the end of string has been reached
-*/
-int32_t CollationElementIterator::next(UErrorCode& status)
-{
-    if (U_FAILURE(status)) { return NULLORDER; }
-    if (dir_ > 1) {
-        // Continue forward iteration. Test this first.
-        if (otherHalf_ != 0) {
-            uint32_t oh = otherHalf_;
-            otherHalf_ = 0;
-            return oh;
-        }
-    } else if (dir_ == 1) {
-        // next() after setOffset()
-        dir_ = 2;
-    } else if (dir_ == 0) {
-        // The iter_ is already reset to the start of the text.
-        dir_ = 2;
-    } else /* dir_ < 0 */ {
-        // illegal change of direction
-        status = U_INVALID_STATE_ERROR;
-        return NULLORDER;
-    }
-    // No need to keep all CEs in the buffer when we iterate.
-    iter_->clearCEsIfNoneRemaining();
-    int64_t ce = iter_->nextCE(status);
-    if (ce == Collation::NO_CE) { return NULLORDER; }
-    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
-    uint32_t p = (uint32_t)(ce >> 32);
-    uint32_t lower32 = (uint32_t)ce;
-    uint32_t firstHalf = getFirstHalf(p, lower32);
-    uint32_t secondHalf = getSecondHalf(p, lower32);
-    if (secondHalf != 0) {
-        otherHalf_ = secondHalf | 0xc0;  // continuation CE
-    }
-    return firstHalf;
-}
-
-UBool CollationElementIterator::operator!=(
-                                  const CollationElementIterator& other) const
-{
-    return !(*this == other);
-}
-
-UBool CollationElementIterator::operator==(
-                                    const CollationElementIterator& that) const
-{
-    if (this == &that) {
-        return TRUE;
-    }
-
-    return
-        (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) &&
-        otherHalf_ == that.otherHalf_ &&
-        normalizeDir() == that.normalizeDir() &&
-        string_ == that.string_ &&
-        *iter_ == *that.iter_;
-}
-
-/**
-* Get the ordering priority of the previous collation element in the string.
-* @param status the error code status.
-* @return the previous element's ordering. Returns NULLORDER if an error has 
-*         occured or if the start of string has been reached.
-*/
-int32_t CollationElementIterator::previous(UErrorCode& status)
-{
-    if (U_FAILURE(status)) { return NULLORDER; }
-    if (dir_ < 0) {
-        // Continue backwards iteration. Test this first.
-        if (otherHalf_ != 0) {
-            uint32_t oh = otherHalf_;
-            otherHalf_ = 0;
-            return oh;
-        }
-    } else if (dir_ == 0) {
-        iter_->resetToOffset(string_.length());
-        dir_ = -1;
-    } else if (dir_ == 1) {
-        // previous() after setOffset()
-        dir_ = -1;
-    } else /* dir_ > 1 */ {
-        // illegal change of direction
-        status = U_INVALID_STATE_ERROR;
-        return NULLORDER;
-    }
-    if (offsets_ == NULL) {
-        offsets_ = new UVector32(status);
-        if (offsets_ == NULL) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return NULLORDER;
-        }
-    }
-    // If we already have expansion CEs, then we also have offsets.
-    // Otherwise remember the trailing offset in case we need to
-    // write offsets for an artificial expansion.
-    int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0;
-    int64_t ce = iter_->previousCE(*offsets_, status);
-    if (ce == Collation::NO_CE) { return NULLORDER; }
-    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
-    uint32_t p = (uint32_t)(ce >> 32);
-    uint32_t lower32 = (uint32_t)ce;
-    uint32_t firstHalf = getFirstHalf(p, lower32);
-    uint32_t secondHalf = getSecondHalf(p, lower32);
-    if (secondHalf != 0) {
-        if (offsets_->isEmpty()) {
-            // When we convert a single 64-bit CE into two 32-bit CEs,
-            // we need to make this artificial expansion behave like a normal expansion.
-            // See CollationIterator::previousCE().
-            offsets_->addElement(iter_->getOffset(), status);
-            offsets_->addElement(limitOffset, status);
-        }
-        otherHalf_ = firstHalf;
-        return secondHalf | 0xc0;  // continuation CE
-    }
-    return firstHalf;
-}
-
-/**
-* Resets the cursor to the beginning of the string.
-*/
-void CollationElementIterator::reset()
-{
-    iter_ ->resetToOffset(0);
-    otherHalf_ = 0;
-    dir_ = 0;
-}
-
-void CollationElementIterator::setOffset(int32_t newOffset, 
-                                         UErrorCode& status)
-{
-    if (U_FAILURE(status)) { return; }
-    if (0 < newOffset && newOffset < string_.length()) {
-        int32_t offset = newOffset;
-        do {
-            UChar c = string_.charAt(offset);
-            if (!rbc_->isUnsafe(c) ||
-                    (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) {
-                break;
-            }
-            // Back up to before this unsafe character.
-            --offset;
-        } while (offset > 0);
-        if (offset < newOffset) {
-            // We might have backed up more than necessary.
-            // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
-            // but for text "chu" setOffset(2) should remain at 2
-            // although we initially back up to offset 0.
-            // Find the last safe offset no greater than newOffset by iterating forward.
-            int32_t lastSafeOffset = offset;
-            do {
-                iter_->resetToOffset(lastSafeOffset);
-                do {
-                    iter_->nextCE(status);
-                    if (U_FAILURE(status)) { return; }
-                } while ((offset = iter_->getOffset()) == lastSafeOffset);
-                if (offset <= newOffset) {
-                    lastSafeOffset = offset;
-                }
-            } while (offset < newOffset);
-            newOffset = lastSafeOffset;
-        }
-    }
-    iter_->resetToOffset(newOffset);
-    otherHalf_ = 0;
-    dir_ = 1;
-}
-
-/**
-* Sets the source to the new source string.
-*/
-void CollationElementIterator::setText(const UnicodeString& source,
-                                       UErrorCode& status)
-{
-    if (U_FAILURE(status)) {
-        return;
-    }
-
-    string_ = source;
-    const UChar *s = string_.getBuffer();
-    CollationIterator *newIter;
-    UBool numeric = rbc_->settings->isNumeric();
-    if (rbc_->settings->dontCheckFCD()) {
-        newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
-    } else {
-        newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
-    }
-    if (newIter == NULL) {
-        status = U_MEMORY_ALLOCATION_ERROR;
-        return;
-    }
-    delete iter_;
-    iter_ = newIter;
-    otherHalf_ = 0;
-    dir_ = 0;
-}
-
-// Sets the source to the new character iterator.
-void CollationElementIterator::setText(CharacterIterator& source, 
-                                       UErrorCode& status)
-{
-    if (U_FAILURE(status)) 
-        return;
-
-    source.getText(string_);
-    setText(string_, status);
-}
-
-int32_t CollationElementIterator::strengthOrder(int32_t order) const
-{
-    UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength();
-    // Mask off the unwanted differences.
-    if (s == UCOL_PRIMARY) {
-        order &= 0xffff0000;
-    }
-    else if (s == UCOL_SECONDARY) {
-        order &= 0xffffff00;
-    }
-
-    return order;
-}
-
-/* CollationElementIterator private constructors/destructors --------------- */
-
+#include "unicode/coleitr.h" 
+#include "unicode/tblcoll.h" 
+#include "unicode/ustring.h" 
+#include "cmemory.h" 
+#include "collation.h" 
+#include "collationdata.h" 
+#include "collationiterator.h" 
+#include "collationsets.h" 
+#include "collationtailoring.h" 
+#include "uassert.h" 
+#include "uhash.h" 
+#include "utf16collationiterator.h" 
+#include "uvectr32.h" 
+ 
+/* Constants --------------------------------------------------------------- */ 
+ 
+U_NAMESPACE_BEGIN 
+ 
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) 
+ 
+/* CollationElementIterator public constructor/destructor ------------------ */ 
+ 
+CollationElementIterator::CollationElementIterator( 
+                                         const CollationElementIterator& other)  
+        : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offsets_(NULL) { 
+    *this = other; 
+} 
+ 
+CollationElementIterator::~CollationElementIterator() 
+{ 
+    delete iter_; 
+    delete offsets_; 
+} 
+ 
+/* CollationElementIterator public methods --------------------------------- */ 
+ 
+namespace { 
+ 
+uint32_t getFirstHalf(uint32_t p, uint32_t lower32) { 
+    return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff); 
+} 
+uint32_t getSecondHalf(uint32_t p, uint32_t lower32) { 
+    return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f); 
+} 
+UBool ceNeedsTwoParts(int64_t ce) { 
+    return (ce & INT64_C(0xffff00ff003f)) != 0; 
+} 
+ 
+}  // namespace 
+ 
+int32_t CollationElementIterator::getOffset() const 
+{ 
+    if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) { 
+        // CollationIterator::previousCE() decrements the CEs length 
+        // while it pops CEs from its internal buffer. 
+        int32_t i = iter_->getCEsLength(); 
+        if (otherHalf_ != 0) { 
+            // Return the trailing CE offset while we are in the middle of a 64-bit CE. 
+            ++i; 
+        } 
+        U_ASSERT(i < offsets_->size()); 
+        return offsets_->elementAti(i); 
+    } 
+    return iter_->getOffset(); 
+} 
+ 
+/** 
+* Get the ordering priority of the next character in the string. 
+* @return the next character's ordering. Returns NULLORDER if an error has  
+*         occured or if the end of string has been reached 
+*/ 
+int32_t CollationElementIterator::next(UErrorCode& status) 
+{ 
+    if (U_FAILURE(status)) { return NULLORDER; } 
+    if (dir_ > 1) { 
+        // Continue forward iteration. Test this first. 
+        if (otherHalf_ != 0) { 
+            uint32_t oh = otherHalf_; 
+            otherHalf_ = 0; 
+            return oh; 
+        } 
+    } else if (dir_ == 1) { 
+        // next() after setOffset() 
+        dir_ = 2; 
+    } else if (dir_ == 0) { 
+        // The iter_ is already reset to the start of the text. 
+        dir_ = 2; 
+    } else /* dir_ < 0 */ { 
+        // illegal change of direction 
+        status = U_INVALID_STATE_ERROR; 
+        return NULLORDER; 
+    } 
+    // No need to keep all CEs in the buffer when we iterate. 
+    iter_->clearCEsIfNoneRemaining(); 
+    int64_t ce = iter_->nextCE(status); 
+    if (ce == Collation::NO_CE) { return NULLORDER; } 
+    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 
+    uint32_t p = (uint32_t)(ce >> 32); 
+    uint32_t lower32 = (uint32_t)ce; 
+    uint32_t firstHalf = getFirstHalf(p, lower32); 
+    uint32_t secondHalf = getSecondHalf(p, lower32); 
+    if (secondHalf != 0) { 
+        otherHalf_ = secondHalf | 0xc0;  // continuation CE 
+    } 
+    return firstHalf; 
+} 
+ 
+UBool CollationElementIterator::operator!=( 
+                                  const CollationElementIterator& other) const 
+{ 
+    return !(*this == other); 
+} 
+ 
+UBool CollationElementIterator::operator==( 
+                                    const CollationElementIterator& that) const 
+{ 
+    if (this == &that) { 
+        return TRUE; 
+    } 
+ 
+    return 
+        (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) && 
+        otherHalf_ == that.otherHalf_ && 
+        normalizeDir() == that.normalizeDir() && 
+        string_ == that.string_ && 
+        *iter_ == *that.iter_; 
+} 
+ 
 /** 
-* This is the "real" constructor for this class; it constructs an iterator
-* over the source text using the specified collator
-*/
-CollationElementIterator::CollationElementIterator(
-                                               const UnicodeString &source,
-                                               const RuleBasedCollator *coll,
-                                               UErrorCode &status)
-        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
-    setText(source, status);
-}
-
+* Get the ordering priority of the previous collation element in the string. 
+* @param status the error code status. 
+* @return the previous element's ordering. Returns NULLORDER if an error has  
+*         occured or if the start of string has been reached. 
+*/ 
+int32_t CollationElementIterator::previous(UErrorCode& status) 
+{ 
+    if (U_FAILURE(status)) { return NULLORDER; } 
+    if (dir_ < 0) { 
+        // Continue backwards iteration. Test this first. 
+        if (otherHalf_ != 0) { 
+            uint32_t oh = otherHalf_; 
+            otherHalf_ = 0; 
+            return oh; 
+        } 
+    } else if (dir_ == 0) { 
+        iter_->resetToOffset(string_.length()); 
+        dir_ = -1; 
+    } else if (dir_ == 1) { 
+        // previous() after setOffset() 
+        dir_ = -1; 
+    } else /* dir_ > 1 */ { 
+        // illegal change of direction 
+        status = U_INVALID_STATE_ERROR; 
+        return NULLORDER; 
+    } 
+    if (offsets_ == NULL) { 
+        offsets_ = new UVector32(status); 
+        if (offsets_ == NULL) { 
+            status = U_MEMORY_ALLOCATION_ERROR; 
+            return NULLORDER; 
+        } 
+    } 
+    // If we already have expansion CEs, then we also have offsets. 
+    // Otherwise remember the trailing offset in case we need to 
+    // write offsets for an artificial expansion. 
+    int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0; 
+    int64_t ce = iter_->previousCE(*offsets_, status); 
+    if (ce == Collation::NO_CE) { return NULLORDER; } 
+    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 
+    uint32_t p = (uint32_t)(ce >> 32); 
+    uint32_t lower32 = (uint32_t)ce; 
+    uint32_t firstHalf = getFirstHalf(p, lower32); 
+    uint32_t secondHalf = getSecondHalf(p, lower32); 
+    if (secondHalf != 0) { 
+        if (offsets_->isEmpty()) { 
+            // When we convert a single 64-bit CE into two 32-bit CEs, 
+            // we need to make this artificial expansion behave like a normal expansion. 
+            // See CollationIterator::previousCE(). 
+            offsets_->addElement(iter_->getOffset(), status); 
+            offsets_->addElement(limitOffset, status); 
+        } 
+        otherHalf_ = firstHalf; 
+        return secondHalf | 0xc0;  // continuation CE 
+    } 
+    return firstHalf; 
+} 
+ 
 /** 
-* This is the "real" constructor for this class; it constructs an iterator over 
-* the source text using the specified collator
-*/
-CollationElementIterator::CollationElementIterator(
-                                           const CharacterIterator &source,
-                                           const RuleBasedCollator *coll,
-                                           UErrorCode &status)
-        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
-    // We only call source.getText() which should be const anyway.
-    setText(const_cast<CharacterIterator &>(source), status);
-}
-
-/* CollationElementIterator private methods -------------------------------- */
-
-const CollationElementIterator& CollationElementIterator::operator=(
-                                         const CollationElementIterator& other)
-{
-    if (this == &other) {
-        return *this;
-    }
-
-    CollationIterator *newIter;
-    const FCDUTF16CollationIterator *otherFCDIter =
-            dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_);
-    if(otherFCDIter != NULL) {
-        newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer());
-    } else {
-        const UTF16CollationIterator *otherIter =
-                dynamic_cast<const UTF16CollationIterator *>(other.iter_);
-        if(otherIter != NULL) {
-            newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer());
-        } else {
-            newIter = NULL;
-        }
-    }
-    if(newIter != NULL) {
-        delete iter_;
-        iter_ = newIter;
-        rbc_ = other.rbc_;
-        otherHalf_ = other.otherHalf_;
-        dir_ = other.dir_;
-
-        string_ = other.string_;
-    }
-    if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) {
-        UErrorCode errorCode = U_ZERO_ERROR;
-        if(offsets_ == NULL) {
-            offsets_ = new UVector32(other.offsets_->size(), errorCode);
-        }
-        if(offsets_ != NULL) {
-            offsets_->assign(*other.offsets_, errorCode);
-        }
-    }
-    return *this;
-}
-
-namespace {
-
-class MaxExpSink : public ContractionsAndExpansions::CESink {
-public:
-    MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {}
-    virtual ~MaxExpSink();
-    virtual void handleCE(int64_t /*ce*/) {}
-    virtual void handleExpansion(const int64_t ces[], int32_t length) {
-        if (length <= 1) {
-            // We do not need to add single CEs into the map.
-            return;
-        }
-        int32_t count = 0;  // number of CE "halves"
-        for (int32_t i = 0; i < length; ++i) {
-            count += ceNeedsTwoParts(ces[i]) ? 2 : 1;
-        }
-        // last "half" of the last CE
-        int64_t ce = ces[length - 1];
-        uint32_t p = (uint32_t)(ce >> 32);
-        uint32_t lower32 = (uint32_t)ce;
-        uint32_t lastHalf = getSecondHalf(p, lower32);
-        if (lastHalf == 0) {
-            lastHalf = getFirstHalf(p, lower32);
-            U_ASSERT(lastHalf != 0);
-        } else {
-            lastHalf |= 0xc0;  // old-style continuation CE
-        }
-        if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) {
-            uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode);
-        }
-    }
-
-private:
-    UHashtable *maxExpansions;
-    UErrorCode &errorCode;
-};
-
-MaxExpSink::~MaxExpSink() {}
-
-}  // namespace
-
-UHashtable *
-CollationElementIterator::computeMaxExpansions(const CollationData *data, UErrorCode &errorCode) {
-    if (U_FAILURE(errorCode)) { return NULL; }
-    UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong,
-                                           uhash_compareLong, &errorCode);
-    if (U_FAILURE(errorCode)) { return NULL; }
-    MaxExpSink sink(maxExpansions, errorCode);
-    ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode);
-    if (U_FAILURE(errorCode)) {
-        uhash_close(maxExpansions);
-        return NULL;
-    }
-    return maxExpansions;
-}
-
-int32_t
-CollationElementIterator::getMaxExpansion(int32_t order) const {
-    return getMaxExpansion(rbc_->tailoring->maxExpansions, order);
-}
-
-int32_t
-CollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32_t order) {
-    if (order == 0) { return 1; }
-    int32_t max;
-    if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) {
-        return max;
-    }
-    if ((order & 0xc0) == 0xc0) {
-        // old-style continuation CE
-        return 2;
-    } else {
-        return 1;
-    }
-}
-
-U_NAMESPACE_END
-
-#endif /* #if !UCONFIG_NO_COLLATION */
+* Resets the cursor to the beginning of the string. 
+*/ 
+void CollationElementIterator::reset() 
+{ 
+    iter_ ->resetToOffset(0); 
+    otherHalf_ = 0; 
+    dir_ = 0; 
+} 
+ 
+void CollationElementIterator::setOffset(int32_t newOffset,  
+                                         UErrorCode& status) 
+{ 
+    if (U_FAILURE(status)) { return; } 
+    if (0 < newOffset && newOffset < string_.length()) { 
+        int32_t offset = newOffset; 
+        do { 
+            UChar c = string_.charAt(offset); 
+            if (!rbc_->isUnsafe(c) || 
+                    (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) { 
+                break; 
+            } 
+            // Back up to before this unsafe character. 
+            --offset; 
+        } while (offset > 0); 
+        if (offset < newOffset) { 
+            // We might have backed up more than necessary. 
+            // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe, 
+            // but for text "chu" setOffset(2) should remain at 2 
+            // although we initially back up to offset 0. 
+            // Find the last safe offset no greater than newOffset by iterating forward. 
+            int32_t lastSafeOffset = offset; 
+            do { 
+                iter_->resetToOffset(lastSafeOffset); 
+                do { 
+                    iter_->nextCE(status); 
+                    if (U_FAILURE(status)) { return; } 
+                } while ((offset = iter_->getOffset()) == lastSafeOffset); 
+                if (offset <= newOffset) { 
+                    lastSafeOffset = offset; 
+                } 
+            } while (offset < newOffset); 
+            newOffset = lastSafeOffset; 
+        } 
+    } 
+    iter_->resetToOffset(newOffset); 
+    otherHalf_ = 0; 
+    dir_ = 1; 
+} 
+ 
+/** 
+* Sets the source to the new source string. 
+*/ 
+void CollationElementIterator::setText(const UnicodeString& source, 
+                                       UErrorCode& status) 
+{ 
+    if (U_FAILURE(status)) { 
+        return; 
+    } 
+ 
+    string_ = source; 
+    const UChar *s = string_.getBuffer(); 
+    CollationIterator *newIter; 
+    UBool numeric = rbc_->settings->isNumeric(); 
+    if (rbc_->settings->dontCheckFCD()) { 
+        newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length()); 
+    } else { 
+        newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length()); 
+    } 
+    if (newIter == NULL) { 
+        status = U_MEMORY_ALLOCATION_ERROR; 
+        return; 
+    } 
+    delete iter_; 
+    iter_ = newIter; 
+    otherHalf_ = 0; 
+    dir_ = 0; 
+} 
+ 
+// Sets the source to the new character iterator. 
+void CollationElementIterator::setText(CharacterIterator& source,  
+                                       UErrorCode& status) 
+{ 
+    if (U_FAILURE(status))  
+        return; 
+ 
+    source.getText(string_); 
+    setText(string_, status); 
+} 
+ 
+int32_t CollationElementIterator::strengthOrder(int32_t order) const 
+{ 
+    UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength(); 
+    // Mask off the unwanted differences. 
+    if (s == UCOL_PRIMARY) { 
+        order &= 0xffff0000; 
+    } 
+    else if (s == UCOL_SECONDARY) { 
+        order &= 0xffffff00; 
+    } 
+ 
+    return order; 
+} 
+ 
+/* CollationElementIterator private constructors/destructors --------------- */ 
+ 
+/**  
+* This is the "real" constructor for this class; it constructs an iterator 
+* over the source text using the specified collator 
+*/ 
+CollationElementIterator::CollationElementIterator( 
+                                               const UnicodeString &source, 
+                                               const RuleBasedCollator *coll, 
+                                               UErrorCode &status) 
+        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) { 
+    setText(source, status); 
+} 
+ 
+/**  
+* This is the "real" constructor for this class; it constructs an iterator over  
+* the source text using the specified collator 
+*/ 
+CollationElementIterator::CollationElementIterator( 
+                                           const CharacterIterator &source, 
+                                           const RuleBasedCollator *coll, 
+                                           UErrorCode &status) 
+        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) { 
+    // We only call source.getText() which should be const anyway. 
+    setText(const_cast<CharacterIterator &>(source), status); 
+} 
+ 
+/* CollationElementIterator private methods -------------------------------- */ 
+ 
+const CollationElementIterator& CollationElementIterator::operator=( 
+                                         const CollationElementIterator& other) 
+{ 
+    if (this == &other) { 
+        return *this; 
+    } 
+ 
+    CollationIterator *newIter; 
+    const FCDUTF16CollationIterator *otherFCDIter = 
+            dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_); 
+    if(otherFCDIter != NULL) { 
+        newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer()); 
+    } else { 
+        const UTF16CollationIterator *otherIter = 
+                dynamic_cast<const UTF16CollationIterator *>(other.iter_); 
+        if(otherIter != NULL) { 
+            newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer()); 
+        } else { 
+            newIter = NULL; 
+        } 
+    } 
+    if(newIter != NULL) { 
+        delete iter_; 
+        iter_ = newIter; 
+        rbc_ = other.rbc_; 
+        otherHalf_ = other.otherHalf_; 
+        dir_ = other.dir_; 
+ 
+        string_ = other.string_; 
+    } 
+    if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) { 
+        UErrorCode errorCode = U_ZERO_ERROR; 
+        if(offsets_ == NULL) { 
+            offsets_ = new UVector32(other.offsets_->size(), errorCode); 
+        } 
+        if(offsets_ != NULL) { 
+            offsets_->assign(*other.offsets_, errorCode); 
+        } 
+    } 
+    return *this; 
+} 
+ 
+namespace { 
+ 
+class MaxExpSink : public ContractionsAndExpansions::CESink { 
+public: 
+    MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {} 
+    virtual ~MaxExpSink(); 
+    virtual void handleCE(int64_t /*ce*/) {} 
+    virtual void handleExpansion(const int64_t ces[], int32_t length) { 
+        if (length <= 1) { 
+            // We do not need to add single CEs into the map. 
+            return; 
+        } 
+        int32_t count = 0;  // number of CE "halves" 
+        for (int32_t i = 0; i < length; ++i) { 
+            count += ceNeedsTwoParts(ces[i]) ? 2 : 1; 
+        } 
+        // last "half" of the last CE 
+        int64_t ce = ces[length - 1]; 
+        uint32_t p = (uint32_t)(ce >> 32); 
+        uint32_t lower32 = (uint32_t)ce; 
+        uint32_t lastHalf = getSecondHalf(p, lower32); 
+        if (lastHalf == 0) { 
+            lastHalf = getFirstHalf(p, lower32); 
+            U_ASSERT(lastHalf != 0); 
+        } else { 
+            lastHalf |= 0xc0;  // old-style continuation CE 
+        } 
+        if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) { 
+            uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode); 
+        } 
+    } 
+ 
+private: 
+    UHashtable *maxExpansions; 
+    UErrorCode &errorCode; 
+}; 
+ 
+MaxExpSink::~MaxExpSink() {} 
+ 
+}  // namespace 
+ 
+UHashtable * 
+CollationElementIterator::computeMaxExpansions(const CollationData *data, UErrorCode &errorCode) { 
+    if (U_FAILURE(errorCode)) { return NULL; } 
+    UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong, 
+                                           uhash_compareLong, &errorCode); 
+    if (U_FAILURE(errorCode)) { return NULL; } 
+    MaxExpSink sink(maxExpansions, errorCode); 
+    ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode); 
+    if (U_FAILURE(errorCode)) { 
+        uhash_close(maxExpansions); 
+        return NULL; 
+    } 
+    return maxExpansions; 
+} 
+ 
+int32_t 
+CollationElementIterator::getMaxExpansion(int32_t order) const { 
+    return getMaxExpansion(rbc_->tailoring->maxExpansions, order); 
+} 
+ 
+int32_t 
+CollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32_t order) { 
+    if (order == 0) { return 1; } 
+    int32_t max; 
+    if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) { 
+        return max; 
+    } 
+    if ((order & 0xc0) == 0xc0) { 
+        // old-style continuation CE 
+        return 2; 
+    } else { 
+        return 1; 
+    } 
+} 
+ 
+U_NAMESPACE_END 
+ 
+#endif /* #if !UCONFIG_NO_COLLATION */
author	neksard <neksard@yandex-team.ru>	2022-02-10 16:45:23 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:23 +0300
commit	8f7cf138264e0caa318144bf8a2c950e0b0a8593 (patch)
tree	83bf5c8c8047c42d8475e6095df90ccdc3d1b57f /contrib/libs/icu/i18n/coleitr.cpp
parent	d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (diff)
download	ydb-8f7cf138264e0caa318144bf8a2c950e0b0a8593.tar.gz