diff options
author | vvvv <vvvv@yandex-team.ru> | 2022-02-11 19:00:01 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.ru> | 2022-02-11 19:00:01 +0300 |
commit | 4f5398551111a2b05b55de391d5d296bd0a670bf (patch) | |
tree | 422aa536507b05260da88883c27d1c9742a292fd /contrib/libs/icu/common/uset.cpp | |
parent | 0d99fc6efe15562b8474a702ab18e406ac102cdf (diff) | |
download | ydb-4f5398551111a2b05b55de391d5d296bd0a670bf.tar.gz |
YQL-13794 remove most of boost & ICU dependency
ref:eae1a1c30940ed737c886518182fd97bd872667d
Diffstat (limited to 'contrib/libs/icu/common/uset.cpp')
-rw-r--r-- | contrib/libs/icu/common/uset.cpp | 641 |
1 files changed, 0 insertions, 641 deletions
diff --git a/contrib/libs/icu/common/uset.cpp b/contrib/libs/icu/common/uset.cpp deleted file mode 100644 index eae7981d52..0000000000 --- a/contrib/libs/icu/common/uset.cpp +++ /dev/null @@ -1,641 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* -* Copyright (C) 2002-2011, International Business Machines -* Corporation and others. All Rights Reserved. -* -******************************************************************************* -* file name: uset.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2002mar07 -* created by: Markus W. Scherer -* -* There are functions to efficiently serialize a USet into an array of uint16_t -* and functions to use such a serialized form efficiently without -* instantiating a new USet. -*/ - -#include "unicode/utypes.h" -#include "unicode/uobject.h" -#include "unicode/uset.h" -#include "unicode/uniset.h" -#include "cmemory.h" -#include "unicode/ustring.h" -#include "unicode/parsepos.h" - -U_NAMESPACE_USE - -U_CAPI USet* U_EXPORT2 -uset_openEmpty() { - return (USet*) new UnicodeSet(); -} - -U_CAPI USet* U_EXPORT2 -uset_open(UChar32 start, UChar32 end) { - return (USet*) new UnicodeSet(start, end); -} - -U_CAPI void U_EXPORT2 -uset_close(USet* set) { - delete (UnicodeSet*) set; -} - -U_CAPI USet * U_EXPORT2 -uset_clone(const USet *set) { - return (USet*) (((UnicodeSet*) set)->UnicodeSet::clone()); -} - -U_CAPI UBool U_EXPORT2 -uset_isFrozen(const USet *set) { - return ((UnicodeSet*) set)->UnicodeSet::isFrozen(); -} - -U_CAPI void U_EXPORT2 -uset_freeze(USet *set) { - ((UnicodeSet*) set)->UnicodeSet::freeze(); -} - -U_CAPI USet * U_EXPORT2 -uset_cloneAsThawed(const USet *set) { - return (USet*) (((UnicodeSet*) set)->UnicodeSet::cloneAsThawed()); -} - -U_CAPI void U_EXPORT2 -uset_set(USet* set, - UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::set(start, end); -} - -U_CAPI void U_EXPORT2 -uset_addAll(USet* set, const USet *additionalSet) { - ((UnicodeSet*) set)->UnicodeSet::addAll(*((const UnicodeSet*)additionalSet)); -} - -U_CAPI void U_EXPORT2 -uset_add(USet* set, UChar32 c) { - ((UnicodeSet*) set)->UnicodeSet::add(c); -} - -U_CAPI void U_EXPORT2 -uset_addRange(USet* set, UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::add(start, end); -} - -U_CAPI void U_EXPORT2 -uset_addString(USet* set, const UChar* str, int32_t strLen) { - // UnicodeString handles -1 for strLen - UnicodeString s(strLen<0, str, strLen); - ((UnicodeSet*) set)->UnicodeSet::add(s); -} - -U_CAPI void U_EXPORT2 -uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen) { - // UnicodeString handles -1 for strLen - UnicodeString s(str, strLen); - ((UnicodeSet*) set)->UnicodeSet::addAll(s); -} - -U_CAPI void U_EXPORT2 -uset_remove(USet* set, UChar32 c) { - ((UnicodeSet*) set)->UnicodeSet::remove(c); -} - -U_CAPI void U_EXPORT2 -uset_removeRange(USet* set, UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::remove(start, end); -} - -U_CAPI void U_EXPORT2 -uset_removeString(USet* set, const UChar* str, int32_t strLen) { - UnicodeString s(strLen==-1, str, strLen); - ((UnicodeSet*) set)->UnicodeSet::remove(s); -} - -U_CAPI void U_EXPORT2 -uset_removeAll(USet* set, const USet* remove) { - ((UnicodeSet*) set)->UnicodeSet::removeAll(*(const UnicodeSet*)remove); -} - -U_CAPI void U_EXPORT2 -uset_retain(USet* set, UChar32 start, UChar32 end) { - ((UnicodeSet*) set)->UnicodeSet::retain(start, end); -} - -U_CAPI void U_EXPORT2 -uset_retainAll(USet* set, const USet* retain) { - ((UnicodeSet*) set)->UnicodeSet::retainAll(*(const UnicodeSet*)retain); -} - -U_CAPI void U_EXPORT2 -uset_compact(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::compact(); -} - -U_CAPI void U_EXPORT2 -uset_complement(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::complement(); -} - -U_CAPI void U_EXPORT2 -uset_complementAll(USet* set, const USet* complement) { - ((UnicodeSet*) set)->UnicodeSet::complementAll(*(const UnicodeSet*)complement); -} - -U_CAPI void U_EXPORT2 -uset_clear(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::clear(); -} - -U_CAPI void U_EXPORT2 -uset_removeAllStrings(USet* set) { - ((UnicodeSet*) set)->UnicodeSet::removeAllStrings(); -} - -U_CAPI UBool U_EXPORT2 -uset_isEmpty(const USet* set) { - return ((const UnicodeSet*) set)->UnicodeSet::isEmpty(); -} - -U_CAPI UBool U_EXPORT2 -uset_contains(const USet* set, UChar32 c) { - return ((const UnicodeSet*) set)->UnicodeSet::contains(c); -} - -U_CAPI UBool U_EXPORT2 -uset_containsRange(const USet* set, UChar32 start, UChar32 end) { - return ((const UnicodeSet*) set)->UnicodeSet::contains(start, end); -} - -U_CAPI UBool U_EXPORT2 -uset_containsString(const USet* set, const UChar* str, int32_t strLen) { - UnicodeString s(strLen==-1, str, strLen); - return ((const UnicodeSet*) set)->UnicodeSet::contains(s); -} - -U_CAPI UBool U_EXPORT2 -uset_containsAll(const USet* set1, const USet* set2) { - return ((const UnicodeSet*) set1)->UnicodeSet::containsAll(* (const UnicodeSet*) set2); -} - -U_CAPI UBool U_EXPORT2 -uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen) { - // Create a string alias, since nothing is being added to the set. - UnicodeString s(strLen==-1, str, strLen); - return ((const UnicodeSet*) set)->UnicodeSet::containsAll(s); -} - -U_CAPI UBool U_EXPORT2 -uset_containsNone(const USet* set1, const USet* set2) { - return ((const UnicodeSet*) set1)->UnicodeSet::containsNone(* (const UnicodeSet*) set2); -} - -U_CAPI UBool U_EXPORT2 -uset_containsSome(const USet* set1, const USet* set2) { - return ((const UnicodeSet*) set1)->UnicodeSet::containsSome(* (const UnicodeSet*) set2); -} - -U_CAPI int32_t U_EXPORT2 -uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::span(s, length, spanCondition); -} - -U_CAPI int32_t U_EXPORT2 -uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::spanBack(s, length, spanCondition); -} - -U_CAPI int32_t U_EXPORT2 -uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::spanUTF8(s, length, spanCondition); -} - -U_CAPI int32_t U_EXPORT2 -uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition) { - return ((UnicodeSet*) set)->UnicodeSet::spanBackUTF8(s, length, spanCondition); -} - -U_CAPI UBool U_EXPORT2 -uset_equals(const USet* set1, const USet* set2) { - return *(const UnicodeSet*)set1 == *(const UnicodeSet*)set2; -} - -U_CAPI int32_t U_EXPORT2 -uset_indexOf(const USet* set, UChar32 c) { - return ((UnicodeSet*) set)->UnicodeSet::indexOf(c); -} - -U_CAPI UChar32 U_EXPORT2 -uset_charAt(const USet* set, int32_t index) { - return ((UnicodeSet*) set)->UnicodeSet::charAt(index); -} - -U_CAPI int32_t U_EXPORT2 -uset_size(const USet* set) { - return ((const UnicodeSet*) set)->UnicodeSet::size(); -} - -U_NAMESPACE_BEGIN -/** - * This class only exists to provide access to the UnicodeSet private - * USet support API. Declaring a class a friend is more portable than - * trying to declare extern "C" functions as friends. - */ -class USetAccess /* not : public UObject because all methods are static */ { -public: - /* Try to have the compiler inline these*/ - inline static int32_t getStringCount(const UnicodeSet& set) { - return set.stringsSize(); - } - inline static const UnicodeString* getString(const UnicodeSet& set, - int32_t i) { - return set.getString(i); - } -private: - /* do not instantiate*/ - USetAccess(); -}; -U_NAMESPACE_END - -U_CAPI int32_t U_EXPORT2 -uset_getItemCount(const USet* uset) { - const UnicodeSet& set = *(const UnicodeSet*)uset; - return set.getRangeCount() + USetAccess::getStringCount(set); -} - -U_CAPI int32_t U_EXPORT2 -uset_getItem(const USet* uset, int32_t itemIndex, - UChar32* start, UChar32* end, - UChar* str, int32_t strCapacity, - UErrorCode* ec) { - if (U_FAILURE(*ec)) return 0; - const UnicodeSet& set = *(const UnicodeSet*)uset; - int32_t rangeCount; - - if (itemIndex < 0) { - *ec = U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } else if (itemIndex < (rangeCount = set.getRangeCount())) { - *start = set.getRangeStart(itemIndex); - *end = set.getRangeEnd(itemIndex); - return 0; - } else { - itemIndex -= rangeCount; - if (itemIndex < USetAccess::getStringCount(set)) { - const UnicodeString* s = USetAccess::getString(set, itemIndex); - return s->extract(str, strCapacity, *ec); - } else { - *ec = U_INDEX_OUTOFBOUNDS_ERROR; - return -1; - } - } -} - -//U_CAPI int32_t U_EXPORT2 -//uset_getRangeCount(const USet* set) { -// return ((const UnicodeSet*) set)->getRangeCount(); -//} -// -//U_CAPI UBool U_EXPORT2 -//uset_getRange(const USet* set, int32_t rangeIndex, -// UChar32* pStart, UChar32* pEnd) { -// if ((uint32_t) rangeIndex >= (uint32_t) uset_getRangeCount(set)) { -// return FALSE; -// } -// const UnicodeSet* us = (const UnicodeSet*) set; -// *pStart = us->getRangeStart(rangeIndex); -// *pEnd = us->getRangeEnd(rangeIndex); -// return TRUE; -//} - -/* - * Serialize a USet into 16-bit units. - * Store BMP code points as themselves with one 16-bit unit each. - * - * Important: the code points in the array are in ascending order, - * therefore all BMP code points precede all supplementary code points. - * - * Store each supplementary code point in 2 16-bit units, - * simply with higher-then-lower 16-bit halfs. - * - * Precede the entire list with the length. - * If there are supplementary code points, then set bit 15 in the length - * and add the bmpLength between it and the array. - * - * In other words: - * - all BMP: (length=bmpLength) BMP, .., BMP - * - some supplementary: (length|0x8000) (bmpLength<length) BMP, .., BMP, supp-high, supp-low, .. - */ -U_CAPI int32_t U_EXPORT2 -uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* ec) { - if (ec==NULL || U_FAILURE(*ec)) { - return 0; - } - - return ((const UnicodeSet*) set)->UnicodeSet::serialize(dest, destCapacity,* ec); -} - -U_CAPI UBool U_EXPORT2 -uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength) { - int32_t length; - - if(fillSet==NULL) { - return FALSE; - } - if(src==NULL || srcLength<=0) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - - length=*src++; - if(length&0x8000) { - /* there are supplementary values */ - length&=0x7fff; - if(srcLength<(2+length)) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - fillSet->bmpLength=*src++; - } else { - /* only BMP values */ - if(srcLength<(1+length)) { - fillSet->length=fillSet->bmpLength=0; - return FALSE; - } - fillSet->bmpLength=length; - } - fillSet->array=src; - fillSet->length=length; - return TRUE; -} - -U_CAPI void U_EXPORT2 -uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c) { - if(fillSet==NULL || (uint32_t)c>0x10ffff) { - return; - } - - fillSet->array=fillSet->staticArray; - if(c<0xffff) { - fillSet->bmpLength=fillSet->length=2; - fillSet->staticArray[0]=(uint16_t)c; - fillSet->staticArray[1]=(uint16_t)c+1; - } else if(c==0xffff) { - fillSet->bmpLength=1; - fillSet->length=3; - fillSet->staticArray[0]=0xffff; - fillSet->staticArray[1]=1; - fillSet->staticArray[2]=0; - } else if(c<0x10ffff) { - fillSet->bmpLength=0; - fillSet->length=4; - fillSet->staticArray[0]=(uint16_t)(c>>16); - fillSet->staticArray[1]=(uint16_t)c; - ++c; - fillSet->staticArray[2]=(uint16_t)(c>>16); - fillSet->staticArray[3]=(uint16_t)c; - } else /* c==0x10ffff */ { - fillSet->bmpLength=0; - fillSet->length=2; - fillSet->staticArray[0]=0x10; - fillSet->staticArray[1]=0xffff; - } -} - -U_CAPI UBool U_EXPORT2 -uset_serializedContains(const USerializedSet* set, UChar32 c) { - const uint16_t* array; - - if(set==NULL || (uint32_t)c>0x10ffff) { - return FALSE; - } - - array=set->array; - if(c<=0xffff) { - /* find c in the BMP part */ - int32_t lo = 0; - int32_t hi = set->bmpLength-1; - if (c < array[0]) { - hi = 0; - } else if (c < array[hi]) { - for(;;) { - int32_t i = (lo + hi) >> 1; - if (i == lo) { - break; // Done! - } else if (c < array[i]) { - hi = i; - } else { - lo = i; - } - } - } else { - hi += 1; - } - return (UBool)(hi&1); - } else { - /* find c in the supplementary part */ - uint16_t high=(uint16_t)(c>>16), low=(uint16_t)c; - int32_t base = set->bmpLength; - int32_t lo = 0; - int32_t hi = set->length - 2 - base; - if (high < array[base] || (high==array[base] && low<array[base+1])) { - hi = 0; - } else if (high < array[base+hi] || (high==array[base+hi] && low<array[base+hi+1])) { - for (;;) { - int32_t i = ((lo + hi) >> 1) & ~1; // Guarantee even result - int32_t iabs = i + base; - if (i == lo) { - break; // Done! - } else if (high < array[iabs] || (high==array[iabs] && low<array[iabs+1])) { - hi = i; - } else { - lo = i; - } - } - } else { - hi += 2; - } - /* count pairs of 16-bit units even per BMP and check if the number of pairs is odd */ - return (UBool)(((hi+(base<<1))&2)!=0); - } -} - -U_CAPI int32_t U_EXPORT2 -uset_getSerializedRangeCount(const USerializedSet* set) { - if(set==NULL) { - return 0; - } - - return (set->bmpLength+(set->length-set->bmpLength)/2+1)/2; -} - -U_CAPI UBool U_EXPORT2 -uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, - UChar32* pStart, UChar32* pEnd) { - const uint16_t* array; - int32_t bmpLength, length; - - if(set==NULL || rangeIndex<0 || pStart==NULL || pEnd==NULL) { - return FALSE; - } - - array=set->array; - length=set->length; - bmpLength=set->bmpLength; - - rangeIndex*=2; /* address start/limit pairs */ - if(rangeIndex<bmpLength) { - *pStart=array[rangeIndex++]; - if(rangeIndex<bmpLength) { - *pEnd=array[rangeIndex]-1; - } else if(rangeIndex<length) { - *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1; - } else { - *pEnd=0x10ffff; - } - return TRUE; - } else { - rangeIndex-=bmpLength; - rangeIndex*=2; /* address pairs of pairs of units */ - length-=bmpLength; - if(rangeIndex<length) { - array+=bmpLength; - *pStart=(((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1]; - rangeIndex+=2; - if(rangeIndex<length) { - *pEnd=((((int32_t)array[rangeIndex])<<16)|array[rangeIndex+1])-1; - } else { - *pEnd=0x10ffff; - } - return TRUE; - } else { - return FALSE; - } - } -} - -// TODO The old, internal uset.c had an efficient uset_containsOne function. -// Returned the one and only code point, or else -1 or something. -// Consider adding such a function to both C and C++ UnicodeSet/uset. -// See tools/gennorm/store.c for usage, now usetContainsOne there. - -// TODO Investigate incorporating this code into UnicodeSet to improve -// efficiency. -// --- -// #define USET_GROW_DELTA 20 -// -// static int32_t -// findChar(const UChar32* array, int32_t length, UChar32 c) { -// int32_t i; -// -// /* check the last range limit first for more efficient appending */ -// if(length>0) { -// if(c>=array[length-1]) { -// return length; -// } -// -// /* do not check the last range limit again in the loop below */ -// --length; -// } -// -// for(i=0; i<length && c>=array[i]; ++i) {} -// return i; -// } -// -// static UBool -// addRemove(USet* set, UChar32 c, int32_t doRemove) { -// int32_t i, length, more; -// -// if(set==NULL || (uint32_t)c>0x10ffff) { -// return FALSE; -// } -// -// length=set->length; -// i=findChar(set->array, length, c); -// if((i&1)^doRemove) { -// /* c is already in the set */ -// return TRUE; -// } -// -// /* how many more array items do we need? */ -// if(i<length && (c+1)==set->array[i]) { -// /* c is just before the following range, extend that in-place by one */ -// set->array[i]=c; -// if(i>0) { -// --i; -// if(c==set->array[i]) { -// /* the previous range collapsed, remove it */ -// set->length=length-=2; -// if(i<length) { -// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4); -// } -// } -// } -// return TRUE; -// } else if(i>0 && c==set->array[i-1]) { -// /* c is just after the previous range, extend that in-place by one */ -// if(++c<=0x10ffff) { -// set->array[i-1]=c; -// if(i<length && c==set->array[i]) { -// /* the following range collapsed, remove it */ -// --i; -// set->length=length-=2; -// if(i<length) { -// uprv_memmove(set->array+i, set->array+i+2, (length-i)*4); -// } -// } -// } else { -// /* extend the previous range (had limit 0x10ffff) to the end of Unicode */ -// set->length=i-1; -// } -// return TRUE; -// } else if(i==length && c==0x10ffff) { -// /* insert one range limit c */ -// more=1; -// } else { -// /* insert two range limits c, c+1 */ -// more=2; -// } -// -// /* insert <more> range limits */ -// if(length+more>set->capacity) { -// /* reallocate */ -// int32_t newCapacity=set->capacity+set->capacity/2+USET_GROW_DELTA; -// UChar32* newArray=(UChar32* )uprv_malloc(newCapacity*4); -// if(newArray==NULL) { -// return FALSE; -// } -// set->capacity=newCapacity; -// uprv_memcpy(newArray, set->array, length*4); -// -// if(set->array!=set->staticBuffer) { -// uprv_free(set->array); -// } -// set->array=newArray; -// } -// -// if(i<length) { -// uprv_memmove(set->array+i+more, set->array+i, (length-i)*4); -// } -// set->array[i]=c; -// if(more==2) { -// set->array[i+1]=c+1; -// } -// set->length+=more; -// -// return TRUE; -// } -// -// U_CAPI UBool U_EXPORT2 -// uset_add(USet* set, UChar32 c) { -// return addRemove(set, c, 0); -// } -// -// U_CAPI void U_EXPORT2 -// uset_remove(USet* set, UChar32 c) { -// addRemove(set, c, 1); -// } |