diff options
author | Sergey Uzhakov <uzhastik@gmail.com> | 2022-06-22 21:23:19 +0300 |
---|---|---|
committer | Sergey Uzhakov <uzhastik@gmail.com> | 2022-06-22 21:23:19 +0300 |
commit | b86c509aaa5b19d5659a4548395a47c9e87826e9 (patch) | |
tree | 98025d7bd574b788f6679b24f91711ea86bf07ae /contrib/libs/icu/common/ustr_wcs.cpp | |
parent | f55ada30d924b55d15fad9001944df1323a9598a (diff) | |
download | ydb-b86c509aaa5b19d5659a4548395a47c9e87826e9.tar.gz |
YQ-1154: allow pg translator in OSS
ref:bc7b8dcc7b45e5f527a87a1bed622dff6f06d41a
Diffstat (limited to 'contrib/libs/icu/common/ustr_wcs.cpp')
-rw-r--r-- | contrib/libs/icu/common/ustr_wcs.cpp | 535 |
1 files changed, 535 insertions, 0 deletions
diff --git a/contrib/libs/icu/common/ustr_wcs.cpp b/contrib/libs/icu/common/ustr_wcs.cpp new file mode 100644 index 0000000000..e9f278e969 --- /dev/null +++ b/contrib/libs/icu/common/ustr_wcs.cpp @@ -0,0 +1,535 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* +* Copyright (C) 2001-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ustr_wcs.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2004sep07 +* created by: Markus W. Scherer +* +* u_strToWCS() and u_strFromWCS() functions +* moved here from ustrtrns.c for better modularization. +*/ + +#include "unicode/utypes.h" +#include "unicode/ustring.h" +#include "cstring.h" +#include "cwchar.h" +#include "cmemory.h" +#include "ustr_imp.h" +#include "ustr_cnv.h" + +#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION + +#define _STACK_BUFFER_CAPACITY 1000 +#define _BUFFER_CAPACITY_MULTIPLIER 2 + +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers. +// Then we could change this to work only with wchar_t buffers. +static inline UBool +u_growAnyBufferFromStatic(void *context, + void **pBuffer, int32_t *pCapacity, int32_t reqCapacity, + int32_t length, int32_t size) { + // Use char* not void* to avoid the compiler's strict-aliasing assumptions + // and related warnings. + char *newBuffer=(char *)uprv_malloc(reqCapacity*size); + if(newBuffer!=NULL) { + if(length>0) { + uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size); + } + *pCapacity=reqCapacity; + } else { + *pCapacity=0; + } + + /* release the old pBuffer if it was not statically allocated */ + if(*pBuffer!=(char *)context) { + uprv_free(*pBuffer); + } + + *pBuffer=newBuffer; + return (UBool)(newBuffer!=NULL); +} + +/* helper function */ +static wchar_t* +_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode){ + + char stackBuffer [_STACK_BUFFER_CAPACITY]; + char* tempBuf = stackBuffer; + int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY; + char* tempBufLimit = stackBuffer + tempBufCapacity; + UConverter* conv = NULL; + char* saveBuf = tempBuf; + wchar_t* intTarget=NULL; + int32_t intTargetCapacity=0; + int count=0,retVal=0; + + const UChar *pSrcLimit =NULL; + const UChar *pSrc = src; + + conv = u_getDefaultConverter(pErrorCode); + + if(U_FAILURE(*pErrorCode)){ + return NULL; + } + + if(srcLength == -1){ + srcLength = u_strlen(pSrc); + } + + pSrcLimit = pSrc + srcLength; + + for(;;) { + /* reset the error state */ + *pErrorCode = U_ZERO_ERROR; + + /* convert to chars using default converter */ + ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode); + count =(tempBuf - saveBuf); + + /* This should rarely occur */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ + tempBuf = saveBuf; + + /* we dont have enough room on the stack grow the buffer */ + int32_t newCapacity = 2 * srcLength; + if(newCapacity <= tempBufCapacity) { + newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity; + } + if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, + newCapacity, count, 1)) { + goto cleanup; + } + + saveBuf = tempBuf; + tempBufLimit = tempBuf + tempBufCapacity; + tempBuf = tempBuf + count; + + } else { + break; + } + } + + if(U_FAILURE(*pErrorCode)){ + goto cleanup; + } + + /* done with conversion null terminate the char buffer */ + if(count>=tempBufCapacity){ + tempBuf = saveBuf; + /* we dont have enough room on the stack grow the buffer */ + if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, + count+1, count, 1)) { + goto cleanup; + } + saveBuf = tempBuf; + } + + saveBuf[count]=0; + + + /* allocate more space than required + * here we assume that every char requires + * no more than 2 wchar_ts + */ + intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */; + intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) ); + + if(intTarget){ + + int32_t nulLen = 0; + int32_t remaining = intTargetCapacity; + wchar_t* pIntTarget=intTarget; + tempBuf = saveBuf; + + /* now convert the mbs to wcs */ + for(;;){ + + /* we can call the system API since we are sure that + * there is atleast 1 null in the input + */ + retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining); + + if(retVal==-1){ + *pErrorCode = U_INVALID_CHAR_FOUND; + break; + }else if(retVal== remaining){/* should never occur */ + int numWritten = (pIntTarget-intTarget); + u_growAnyBufferFromStatic(NULL,(void**) &intTarget, + &intTargetCapacity, + intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER, + numWritten, + sizeof(wchar_t)); + pIntTarget = intTarget; + remaining=intTargetCapacity; + + if(nulLen!=count){ /*there are embedded nulls*/ + pIntTarget+=numWritten; + remaining-=numWritten; + } + + }else{ + int32_t nulVal; + /*scan for nulls */ + /* we donot check for limit since tempBuf is null terminated */ + while(tempBuf[nulLen++] != 0){ + } + nulVal = (nulLen < srcLength) ? 1 : 0; + pIntTarget = pIntTarget + retVal+nulVal; + remaining -=(retVal+nulVal); + + /* check if we have reached the source limit*/ + if(nulLen>=(count)){ + break; + } + } + } + count = (int32_t)(pIntTarget-intTarget); + + if(0 < count && count <= destCapacity){ + uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t)); + } + + if(pDestLength){ + *pDestLength = count; + } + + /* free the allocated memory */ + uprv_free(intTarget); + + }else{ + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + } +cleanup: + /* are we still using stack buffer */ + if(stackBuffer != saveBuf){ + uprv_free(saveBuf); + } + u_terminateWChars(dest,destCapacity,count,pErrorCode); + + u_releaseDefaultConverter(conv); + + return dest; +} +#endif + +U_CAPI wchar_t* U_EXPORT2 +u_strToWCS(wchar_t *dest, + int32_t destCapacity, + int32_t *pDestLength, + const UChar *src, + int32_t srcLength, + UErrorCode *pErrorCode){ + + /* args check */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ + return NULL; + } + + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + +#ifdef U_WCHAR_IS_UTF16 + /* wchar_t is UTF-16 just do a memcpy */ + if(srcLength == -1){ + srcLength = u_strlen(src); + } + if(0 < srcLength && srcLength <= destCapacity){ + u_memcpy((UChar *)dest, src, srcLength); + } + if(pDestLength){ + *pDestLength = srcLength; + } + + u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode); + + return dest; + +#elif defined U_WCHAR_IS_UTF32 + + return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength, + src, srcLength, pErrorCode); + +#else + + return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode); + +#endif + +} + +#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) +/* helper function */ +static UChar* +_strFromWCS( UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode) +{ + int32_t retVal =0, count =0 ; + UConverter* conv = NULL; + UChar* pTarget = NULL; + UChar* pTargetLimit = NULL; + UChar* target = NULL; + + UChar uStack [_STACK_BUFFER_CAPACITY]; + + wchar_t wStack[_STACK_BUFFER_CAPACITY]; + wchar_t* pWStack = wStack; + + + char cStack[_STACK_BUFFER_CAPACITY]; + int32_t cStackCap = _STACK_BUFFER_CAPACITY; + char* pCSrc=cStack; + char* pCSave=pCSrc; + char* pCSrcLimit=NULL; + + const wchar_t* pSrc = src; + const wchar_t* pSrcLimit = NULL; + + if(srcLength ==-1){ + /* if the wchar_t source is null terminated we can safely + * assume that there are no embedded nulls, this is a fast + * path for null terminated strings. + */ + for(;;){ + /* convert wchars to chars */ + retVal = uprv_wcstombs(pCSrc,src, cStackCap); + + if(retVal == -1){ + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + goto cleanup; + }else if(retVal >= (cStackCap-1)){ + /* Should rarely occur */ + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, + cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char)); + pCSave = pCSrc; + }else{ + /* converted every thing */ + pCSrc = pCSrc+retVal; + break; + } + } + + }else{ + /* here the source is not null terminated + * so it may have nulls embeded and we need to + * do some extra processing + */ + int32_t remaining =cStackCap; + + pSrcLimit = src + srcLength; + + for(;;){ + int32_t nulLen = 0; + + /* find nulls in the string */ + while(nulLen<srcLength && pSrc[nulLen++]!=0){ + } + + if((pSrc+nulLen) < pSrcLimit){ + /* check if we have enough room in pCSrc */ + if(remaining < (nulLen * MB_CUR_MAX)){ + /* should rarely occur */ + int32_t len = (pCSrc-pCSave); + pCSrc = pCSave; + /* we do not have enough room so grow the buffer*/ + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, + _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); + + pCSave = pCSrc; + pCSrc = pCSave+len; + remaining = cStackCap-(pCSrc - pCSave); + } + + /* we have found a null so convert the + * chunk from begining of non-null char to null + */ + retVal = uprv_wcstombs(pCSrc,pSrc,remaining); + + if(retVal==-1){ + /* an error occurred bail out */ + *pErrorCode = U_ILLEGAL_CHAR_FOUND; + goto cleanup; + } + + pCSrc += retVal+1 /* already null terminated */; + + pSrc += nulLen; /* skip past the null */ + srcLength-=nulLen; /* decrement the srcLength */ + remaining -= (pCSrc-pCSave); + + + }else{ + /* the source is not null terminated and we are + * end of source so we copy the source to a temp buffer + * null terminate it and convert wchar_ts to chars + */ + if(nulLen >= _STACK_BUFFER_CAPACITY){ + /* Should rarely occcur */ + /* allocate new buffer buffer */ + pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1)); + if(pWStack==NULL){ + *pErrorCode = U_MEMORY_ALLOCATION_ERROR; + goto cleanup; + } + } + if(nulLen>0){ + /* copy the contents to tempStack */ + uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t)); + } + + /* null terminate the tempBuffer */ + pWStack[nulLen] =0 ; + + if(remaining < (nulLen * MB_CUR_MAX)){ + /* Should rarely occur */ + int32_t len = (pCSrc-pCSave); + pCSrc = pCSave; + /* we do not have enough room so grow the buffer*/ + u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap, + cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char)); + + pCSave = pCSrc; + pCSrc = pCSave+len; + remaining = cStackCap-(pCSrc - pCSave); + } + /* convert to chars */ + retVal = uprv_wcstombs(pCSrc,pWStack,remaining); + + pCSrc += retVal; + pSrc += nulLen; + srcLength-=nulLen; /* decrement the srcLength */ + break; + } + } + } + + /* OK..now we have converted from wchar_ts to chars now + * convert chars to UChars + */ + pCSrcLimit = pCSrc; + pCSrc = pCSave; + pTarget = target= dest; + pTargetLimit = dest + destCapacity; + + conv= u_getDefaultConverter(pErrorCode); + + if(U_FAILURE(*pErrorCode)|| conv==NULL){ + goto cleanup; + } + + for(;;) { + + *pErrorCode = U_ZERO_ERROR; + + /* convert to stack buffer*/ + ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode); + + /* increment count to number written to stack */ + count+= pTarget - target; + + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){ + target = uStack; + pTarget = uStack; + pTargetLimit = uStack + _STACK_BUFFER_CAPACITY; + } else { + break; + } + + } + + if(pDestLength){ + *pDestLength =count; + } + + u_terminateUChars(dest,destCapacity,count,pErrorCode); + +cleanup: + + if(cStack != pCSave){ + uprv_free(pCSave); + } + + if(wStack != pWStack){ + uprv_free(pWStack); + } + + u_releaseDefaultConverter(conv); + + return dest; +} +#endif + +U_CAPI UChar* U_EXPORT2 +u_strFromWCS(UChar *dest, + int32_t destCapacity, + int32_t *pDestLength, + const wchar_t *src, + int32_t srcLength, + UErrorCode *pErrorCode) +{ + + /* args check */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){ + return NULL; + } + + if( (src==NULL && srcLength!=0) || srcLength < -1 || + (destCapacity<0) || (dest == NULL && destCapacity > 0) + ) { + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + +#ifdef U_WCHAR_IS_UTF16 + /* wchar_t is UTF-16 just do a memcpy */ + if(srcLength == -1){ + srcLength = u_strlen((const UChar *)src); + } + if(0 < srcLength && srcLength <= destCapacity){ + u_memcpy(dest, (const UChar *)src, srcLength); + } + if(pDestLength){ + *pDestLength = srcLength; + } + + u_terminateUChars(dest,destCapacity,srcLength,pErrorCode); + + return dest; + +#elif defined U_WCHAR_IS_UTF32 + + return u_strFromUTF32(dest, destCapacity, pDestLength, + (UChar32*)src, srcLength, pErrorCode); + +#else + + return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode); + +#endif + +} + +#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */ |