YQ-1154: allow pg translator in OSS

ref:bc7b8dcc7b45e5f527a87a1bed622dff6f06d41a
author: Sergey Uzhakov <uzhastik@gmail.com> 2022-06-22 21:23:19 +0300
committer: Sergey Uzhakov <uzhastik@gmail.com> 2022-06-22 21:23:19 +0300
commit: b86c509aaa5b19d5659a4548395a47c9e87826e9 (patch)
tree: 98025d7bd574b788f6679b24f91711ea86bf07ae /contrib/libs/icu/common/ustr_wcs.cpp
parent: f55ada30d924b55d15fad9001944df1323a9598a (diff)
download: ydb-b86c509aaa5b19d5659a4548395a47c9e87826e9.tar.gz
1 files changed, 535 insertions, 0 deletions
diff --git a/contrib/libs/icu/common/ustr_wcs.cpp b/contrib/libs/icu/common/ustr_wcs.cpp
new file mode 100644
index 0000000000..e9f278e969
--- /dev/null
+++ b/contrib/libs/icu/common/ustr_wcs.cpp
@@ -0,0 +1,535 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2001-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ustr_wcs.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004sep07
+*   created by: Markus W. Scherer
+*
+*   u_strToWCS() and u_strFromWCS() functions
+*   moved here from ustrtrns.c for better modularization.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "cstring.h"
+#include "cwchar.h"
+#include "cmemory.h"
+#include "ustr_imp.h"
+#include "ustr_cnv.h"
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+
+#define _STACK_BUFFER_CAPACITY 1000
+#define _BUFFER_CAPACITY_MULTIPLIER 2
+
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+// TODO: We should use CharString for char buffers and UnicodeString for UChar buffers.
+// Then we could change this to work only with wchar_t buffers.
+static inline UBool 
+u_growAnyBufferFromStatic(void *context,
+                       void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
+                       int32_t length, int32_t size) {
+    // Use char* not void* to avoid the compiler's strict-aliasing assumptions
+    // and related warnings.
+    char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
+    if(newBuffer!=NULL) {
+        if(length>0) {
+            uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
+        }
+        *pCapacity=reqCapacity;
+    } else {
+        *pCapacity=0;
+    }
+
+    /* release the old pBuffer if it was not statically allocated */
+    if(*pBuffer!=(char *)context) {
+        uprv_free(*pBuffer);
+    }
+
+    *pBuffer=newBuffer;
+    return (UBool)(newBuffer!=NULL);
+}
+
+/* helper function */
+static wchar_t* 
+_strToWCS(wchar_t *dest, 
+           int32_t destCapacity,
+           int32_t *pDestLength,
+           const UChar *src, 
+           int32_t srcLength,
+           UErrorCode *pErrorCode){
+
+    char stackBuffer [_STACK_BUFFER_CAPACITY];
+    char* tempBuf = stackBuffer;
+    int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
+    char* tempBufLimit = stackBuffer + tempBufCapacity;
+    UConverter* conv = NULL;
+    char* saveBuf = tempBuf;
+    wchar_t* intTarget=NULL;
+    int32_t intTargetCapacity=0;
+    int count=0,retVal=0;
+    
+    const UChar *pSrcLimit =NULL;
+    const UChar *pSrc = src;
+
+    conv = u_getDefaultConverter(pErrorCode);
+    
+    if(U_FAILURE(*pErrorCode)){
+        return NULL;
+    }
+    
+    if(srcLength == -1){
+        srcLength = u_strlen(pSrc);
+    }
+    
+    pSrcLimit = pSrc + srcLength;
+
+    for(;;) {
+        /* reset the error state */
+        *pErrorCode = U_ZERO_ERROR;
+
+        /* convert to chars using default converter */
+        ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,NULL,(UBool)(pSrc==pSrcLimit),pErrorCode);
+        count =(tempBuf - saveBuf);
+        
+        /* This should rarely occur */
+        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
+            tempBuf = saveBuf;
+            
+            /* we dont have enough room on the stack grow the buffer */
+            int32_t newCapacity = 2 * srcLength;
+            if(newCapacity <= tempBufCapacity) {
+                newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
+            }
+            if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
+                    newCapacity, count, 1)) {
+                goto cleanup;
+            }
+          
+           saveBuf = tempBuf;
+           tempBufLimit = tempBuf + tempBufCapacity;
+           tempBuf = tempBuf + count;
+
+        } else {
+            break;
+        }
+    }
+
+    if(U_FAILURE(*pErrorCode)){
+        goto cleanup;
+    }
+
+    /* done with conversion null terminate the char buffer */
+    if(count>=tempBufCapacity){
+        tempBuf = saveBuf;
+        /* we dont have enough room on the stack grow the buffer */
+        if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity, 
+                count+1, count, 1)) {
+            goto cleanup;
+        }              
+       saveBuf = tempBuf;
+    }
+    
+    saveBuf[count]=0;
+      
+
+    /* allocate more space than required 
+     * here we assume that every char requires 
+     * no more than 2 wchar_ts
+     */
+    intTargetCapacity =  (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
+    intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
+
+    if(intTarget){
+
+        int32_t nulLen = 0;
+        int32_t remaining = intTargetCapacity;
+        wchar_t* pIntTarget=intTarget;
+        tempBuf = saveBuf;
+        
+        /* now convert the mbs to wcs */
+        for(;;){
+            
+            /* we can call the system API since we are sure that
+             * there is atleast 1 null in the input
+             */
+            retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
+            
+            if(retVal==-1){
+                *pErrorCode = U_INVALID_CHAR_FOUND;
+                break;
+            }else if(retVal== remaining){/* should never occur */
+                int numWritten = (pIntTarget-intTarget);
+                u_growAnyBufferFromStatic(NULL,(void**) &intTarget,
+                                          &intTargetCapacity,
+                                          intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
+                                          numWritten,
+                                          sizeof(wchar_t));
+                pIntTarget = intTarget;
+                remaining=intTargetCapacity;
+
+                if(nulLen!=count){ /*there are embedded nulls*/
+                    pIntTarget+=numWritten;
+                    remaining-=numWritten;
+                }
+
+            }else{
+                int32_t nulVal;
+                /*scan for nulls */
+                /* we donot check for limit since tempBuf is null terminated */
+                while(tempBuf[nulLen++] != 0){
+                }
+                nulVal = (nulLen < srcLength) ? 1 : 0; 
+                pIntTarget = pIntTarget + retVal+nulVal;
+                remaining -=(retVal+nulVal);
+            
+                /* check if we have reached the source limit*/
+                if(nulLen>=(count)){
+                    break;
+                }
+            }
+        }
+        count = (int32_t)(pIntTarget-intTarget);
+       
+        if(0 < count && count <= destCapacity){
+            uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
+        }  
+
+        if(pDestLength){
+            *pDestLength = count;
+        }
+
+        /* free the allocated memory */
+        uprv_free(intTarget);
+
+    }else{
+        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+    }
+cleanup:
+    /* are we still using stack buffer */
+    if(stackBuffer != saveBuf){
+        uprv_free(saveBuf);
+    }
+    u_terminateWChars(dest,destCapacity,count,pErrorCode);
+
+    u_releaseDefaultConverter(conv);
+
+    return dest;
+}
+#endif
+
+U_CAPI wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest, 
+           int32_t destCapacity,
+           int32_t *pDestLength,
+           const UChar *src, 
+           int32_t srcLength,
+           UErrorCode *pErrorCode){
+
+    /* args check */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+        return NULL;
+    }
+        
+    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+        (destCapacity<0) || (dest == NULL && destCapacity > 0)
+    ) {
+        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+    
+#ifdef U_WCHAR_IS_UTF16
+    /* wchar_t is UTF-16 just do a memcpy */
+    if(srcLength == -1){
+        srcLength = u_strlen(src);
+    }
+    if(0 < srcLength && srcLength <= destCapacity){
+        u_memcpy((UChar *)dest, src, srcLength);
+    }
+    if(pDestLength){
+       *pDestLength = srcLength;
+    }
+
+    u_terminateUChars((UChar *)dest,destCapacity,srcLength,pErrorCode);
+
+    return dest;
+
+#elif defined U_WCHAR_IS_UTF32
+    
+    return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
+                                  src, srcLength, pErrorCode);
+
+#else
+    
+    return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
+    
+#endif
+
+}
+
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+/* helper function */
+static UChar* 
+_strFromWCS( UChar   *dest,
+             int32_t destCapacity, 
+             int32_t *pDestLength,
+             const wchar_t *src,
+             int32_t srcLength,
+             UErrorCode *pErrorCode)
+{
+    int32_t retVal =0, count =0 ;
+    UConverter* conv = NULL;
+    UChar* pTarget = NULL;
+    UChar* pTargetLimit = NULL;
+    UChar* target = NULL;
+    
+    UChar uStack [_STACK_BUFFER_CAPACITY];
+
+    wchar_t wStack[_STACK_BUFFER_CAPACITY];
+    wchar_t* pWStack = wStack;
+
+
+    char cStack[_STACK_BUFFER_CAPACITY];
+    int32_t cStackCap = _STACK_BUFFER_CAPACITY;
+    char* pCSrc=cStack;
+    char* pCSave=pCSrc;
+    char* pCSrcLimit=NULL;
+
+    const wchar_t* pSrc = src;
+    const wchar_t* pSrcLimit = NULL;
+
+    if(srcLength ==-1){
+        /* if the wchar_t source is null terminated we can safely
+         * assume that there are no embedded nulls, this is a fast
+         * path for null terminated strings.
+         */
+        for(;;){
+            /* convert wchars  to chars */
+            retVal = uprv_wcstombs(pCSrc,src, cStackCap);
+    
+            if(retVal == -1){
+                *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                goto cleanup;
+            }else if(retVal >= (cStackCap-1)){
+                /* Should rarely occur */
+                u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
+                    cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
+                pCSave = pCSrc;
+            }else{
+                /* converted every thing */
+                pCSrc = pCSrc+retVal;
+                break;
+            }
+        }
+        
+    }else{
+        /* here the source is not null terminated 
+         * so it may have nulls embeded and we need to
+         * do some extra processing 
+         */
+        int32_t remaining =cStackCap;
+        
+        pSrcLimit = src + srcLength;
+
+        for(;;){
+            int32_t nulLen = 0;
+
+            /* find nulls in the string */
+            while(nulLen<srcLength && pSrc[nulLen++]!=0){
+            }
+
+            if((pSrc+nulLen) < pSrcLimit){
+                /* check if we have enough room in pCSrc */
+                if(remaining < (nulLen * MB_CUR_MAX)){
+                    /* should rarely occur */
+                    int32_t len = (pCSrc-pCSave);
+                    pCSrc = pCSave;
+                    /* we do not have enough room so grow the buffer*/
+                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
+                           _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
+
+                    pCSave = pCSrc;
+                    pCSrc = pCSave+len;
+                    remaining = cStackCap-(pCSrc - pCSave);
+                }
+
+                /* we have found a null  so convert the 
+                 * chunk from begining of non-null char to null
+                 */
+                retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
+
+                if(retVal==-1){
+                    /* an error occurred bail out */
+                    *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                    goto cleanup;
+                }
+
+                pCSrc += retVal+1 /* already null terminated */;
+
+                pSrc += nulLen; /* skip past the null */
+                srcLength-=nulLen; /* decrement the srcLength */
+                remaining -= (pCSrc-pCSave);
+
+
+            }else{
+                /* the source is not null terminated and we are 
+                 * end of source so we copy the source to a temp buffer
+                 * null terminate it and convert wchar_ts to chars
+                 */
+                if(nulLen >= _STACK_BUFFER_CAPACITY){
+                    /* Should rarely occcur */
+                    /* allocate new buffer buffer */
+                    pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
+                    if(pWStack==NULL){
+                        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+                        goto cleanup;
+                    }
+                }
+                if(nulLen>0){
+                    /* copy the contents to tempStack */
+                    uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
+                }
+            
+                /* null terminate the tempBuffer */
+                pWStack[nulLen] =0 ;
+            
+                if(remaining < (nulLen * MB_CUR_MAX)){
+                    /* Should rarely occur */
+                    int32_t len = (pCSrc-pCSave);
+                    pCSrc = pCSave;
+                    /* we do not have enough room so grow the buffer*/
+                    u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
+                           cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
+
+                    pCSave = pCSrc;
+                    pCSrc = pCSave+len;
+                    remaining = cStackCap-(pCSrc - pCSave);
+                }
+                /* convert to chars */
+                retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
+            
+                pCSrc += retVal;
+                pSrc  += nulLen;
+                srcLength-=nulLen; /* decrement the srcLength */
+                break;
+            }
+        }
+    }
+
+    /* OK..now we have converted from wchar_ts to chars now 
+     * convert chars to UChars 
+     */
+    pCSrcLimit = pCSrc;
+    pCSrc = pCSave;
+    pTarget = target= dest;
+    pTargetLimit = dest + destCapacity;    
+    
+    conv= u_getDefaultConverter(pErrorCode);
+    
+    if(U_FAILURE(*pErrorCode)|| conv==NULL){
+        goto cleanup;
+    }
+    
+    for(;;) {
+        
+        *pErrorCode = U_ZERO_ERROR;
+        
+        /* convert to stack buffer*/
+        ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,NULL,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
+        
+        /* increment count to number written to stack */
+        count+= pTarget - target;
+        
+        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
+            target = uStack;
+            pTarget = uStack;
+            pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
+        } else {
+            break;
+        }
+        
+    }
+    
+    if(pDestLength){
+        *pDestLength =count;
+    }
+
+    u_terminateUChars(dest,destCapacity,count,pErrorCode);
+    
+cleanup:
+ 
+    if(cStack != pCSave){
+        uprv_free(pCSave);
+    }
+
+    if(wStack != pWStack){
+        uprv_free(pWStack);
+    }
+    
+    u_releaseDefaultConverter(conv);
+
+    return dest;
+}
+#endif
+
+U_CAPI UChar* U_EXPORT2
+u_strFromWCS(UChar   *dest,
+             int32_t destCapacity, 
+             int32_t *pDestLength,
+             const wchar_t *src,
+             int32_t srcLength,
+             UErrorCode *pErrorCode)
+{
+
+    /* args check */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)){
+        return NULL;
+    }
+
+    if( (src==NULL && srcLength!=0) || srcLength < -1 ||
+        (destCapacity<0) || (dest == NULL && destCapacity > 0)
+    ) {
+        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return NULL;
+    }
+
+#ifdef U_WCHAR_IS_UTF16
+    /* wchar_t is UTF-16 just do a memcpy */
+    if(srcLength == -1){
+        srcLength = u_strlen((const UChar *)src);
+    }
+    if(0 < srcLength && srcLength <= destCapacity){
+        u_memcpy(dest, (const UChar *)src, srcLength);
+    }
+    if(pDestLength){
+       *pDestLength = srcLength;
+    }
+
+    u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
+
+    return dest;
+
+#elif defined U_WCHAR_IS_UTF32
+    
+    return u_strFromUTF32(dest, destCapacity, pDestLength,
+                          (UChar32*)src, srcLength, pErrorCode);
+
+#else
+
+    return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);  
+
+#endif
+
+}
+
+#endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
author	Sergey Uzhakov <uzhastik@gmail.com>	2022-06-22 21:23:19 +0300
committer	Sergey Uzhakov <uzhastik@gmail.com>	2022-06-22 21:23:19 +0300
commit	b86c509aaa5b19d5659a4548395a47c9e87826e9 (patch)
tree	98025d7bd574b788f6679b24f91711ea86bf07ae /contrib/libs/icu/common/ustr_wcs.cpp
parent	f55ada30d924b55d15fad9001944df1323a9598a (diff)
download	ydb-b86c509aaa5b19d5659a4548395a47c9e87826e9.tar.gz