diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
commit | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (patch) | |
tree | 2768b1223e96a8a0610a93d18425d9647c1123c8 /contrib/libs/icu/common/ucnv.cpp | |
parent | 60040c91ffe701a84689b2c6310ff845e65cff42 (diff) | |
download | ydb-e9d19cec64684c9c1e6b0c98297e5b895cf904fe.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/common/ucnv.cpp')
-rw-r--r-- | contrib/libs/icu/common/ucnv.cpp | 5814 |
1 files changed, 2907 insertions, 2907 deletions
diff --git a/contrib/libs/icu/common/ucnv.cpp b/contrib/libs/icu/common/ucnv.cpp index 5dcf35e043..06e2a272cb 100644 --- a/contrib/libs/icu/common/ucnv.cpp +++ b/contrib/libs/icu/common/ucnv.cpp @@ -1,2910 +1,2910 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* -* Copyright (C) 1998-2016, International Business Machines -* Corporation and others. All Rights Reserved. -* -****************************************************************************** -* -* ucnv.c: -* Implements APIs for the ICU's codeset conversion library; -* mostly calls through internal functions; -* created by Bertrand A. Damiba -* -* Modification History: -* -* Date Name Description -* 04/04/99 helena Fixed internal header inclusion. -* 05/09/00 helena Added implementation to handle fallback mappings. -* 06/20/2000 helena OS/400 port changes; mostly typecast. -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION - -#include <memory> - -#include "unicode/ustring.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" -#include "unicode/uset.h" -#include "unicode/utf.h" -#include "unicode/utf16.h" -#include "putilimp.h" -#include "cmemory.h" -#include "cstring.h" -#include "uassert.h" -#include "utracimp.h" -#include "ustr_imp.h" -#include "ucnv_imp.h" -#include "ucnv_cnv.h" -#include "ucnv_bld.h" - -/* size of intermediate and preflighting buffers in ucnv_convert() */ -#define CHUNK_SIZE 1024 - -typedef struct UAmbiguousConverter { - const char *name; - const UChar variant5c; -} UAmbiguousConverter; - -static const UAmbiguousConverter ambiguousConverters[]={ - { "ibm-897_P100-1995", 0xa5 }, - { "ibm-942_P120-1999", 0xa5 }, - { "ibm-943_P130-1999", 0xa5 }, - { "ibm-946_P100-1995", 0xa5 }, - { "ibm-33722_P120-1999", 0xa5 }, - { "ibm-1041_P100-1995", 0xa5 }, - /*{ "ibm-54191_P100-2006", 0xa5 },*/ - /*{ "ibm-62383_P100-2007", 0xa5 },*/ - /*{ "ibm-891_P100-1995", 0x20a9 },*/ - { "ibm-944_P100-1995", 0x20a9 }, - { "ibm-949_P110-1999", 0x20a9 }, - { "ibm-1363_P110-1997", 0x20a9 }, - { "ISO_2022,locale=ko,version=0", 0x20a9 }, - { "ibm-1088_P100-1995", 0x20a9 } -}; - -/*Calls through createConverter */ -U_CAPI UConverter* U_EXPORT2 -ucnv_open (const char *name, - UErrorCode * err) -{ - UConverter *r; - - if (err == NULL || U_FAILURE (*err)) { - return NULL; - } - - r = ucnv_createConverter(NULL, name, err); - return r; -} - +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1998-2016, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* +* ucnv.c: +* Implements APIs for the ICU's codeset conversion library; +* mostly calls through internal functions; +* created by Bertrand A. Damiba +* +* Modification History: +* +* Date Name Description +* 04/04/99 helena Fixed internal header inclusion. +* 05/09/00 helena Added implementation to handle fallback mappings. +* 06/20/2000 helena OS/400 port changes; mostly typecast. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include <memory> + +#include "unicode/ustring.h" +#include "unicode/ucnv.h" +#include "unicode/ucnv_err.h" +#include "unicode/uset.h" +#include "unicode/utf.h" +#include "unicode/utf16.h" +#include "putilimp.h" +#include "cmemory.h" +#include "cstring.h" +#include "uassert.h" +#include "utracimp.h" +#include "ustr_imp.h" +#include "ucnv_imp.h" +#include "ucnv_cnv.h" +#include "ucnv_bld.h" + +/* size of intermediate and preflighting buffers in ucnv_convert() */ +#define CHUNK_SIZE 1024 + +typedef struct UAmbiguousConverter { + const char *name; + const UChar variant5c; +} UAmbiguousConverter; + +static const UAmbiguousConverter ambiguousConverters[]={ + { "ibm-897_P100-1995", 0xa5 }, + { "ibm-942_P120-1999", 0xa5 }, + { "ibm-943_P130-1999", 0xa5 }, + { "ibm-946_P100-1995", 0xa5 }, + { "ibm-33722_P120-1999", 0xa5 }, + { "ibm-1041_P100-1995", 0xa5 }, + /*{ "ibm-54191_P100-2006", 0xa5 },*/ + /*{ "ibm-62383_P100-2007", 0xa5 },*/ + /*{ "ibm-891_P100-1995", 0x20a9 },*/ + { "ibm-944_P100-1995", 0x20a9 }, + { "ibm-949_P110-1999", 0x20a9 }, + { "ibm-1363_P110-1997", 0x20a9 }, + { "ISO_2022,locale=ko,version=0", 0x20a9 }, + { "ibm-1088_P100-1995", 0x20a9 } +}; + +/*Calls through createConverter */ U_CAPI UConverter* U_EXPORT2 -ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) -{ - return ucnv_createConverterFromPackage(packageName, converterName, err); -} - -/*Extracts the UChar* to a char* and calls through createConverter */ -U_CAPI UConverter* U_EXPORT2 -ucnv_openU (const UChar * name, - UErrorCode * err) -{ - char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - - if (err == NULL || U_FAILURE(*err)) - return NULL; - if (name == NULL) - return ucnv_open (NULL, err); - if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - return ucnv_open(u_austrcpy(asciiName, name), err); -} - -/* Copy the string that is represented by the UConverterPlatform enum - * @param platformString An output buffer - * @param platform An enum representing a platform - * @return the length of the copied string. - */ -static int32_t -ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) -{ - switch (pltfrm) - { - case UCNV_IBM: - uprv_strcpy(platformString, "ibm-"); - return 4; - case UCNV_UNKNOWN: - break; - } - - /* default to empty string */ - *platformString = 0; - return 0; -} - -/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls - *through createConverter*/ -U_CAPI UConverter* U_EXPORT2 -ucnv_openCCSID (int32_t codepage, - UConverterPlatform platform, - UErrorCode * err) -{ - char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; - int32_t myNameLen; - - if (err == NULL || U_FAILURE (*err)) - return NULL; - - /* ucnv_copyPlatformString could return "ibm-" or "cp" */ - myNameLen = ucnv_copyPlatformString(myName, platform); - T_CString_integerToString(myName + myNameLen, codepage, 10); - - return ucnv_createConverter(NULL, myName, err); -} - -/* Creating a temporary stack-based object that can be used in one thread, -and created from a converter that is shared across threads. -*/ - -U_CAPI UConverter* U_EXPORT2 -ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) -{ - UConverter *localConverter, *allocatedConverter; - int32_t stackBufferSize; - int32_t bufferSizeNeeded; - UErrorCode cbErr; - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - - UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); - - if (status == NULL || U_FAILURE(*status)){ - UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); - return NULL; - } - - if (cnv == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - UTRACE_EXIT_STATUS(*status); - return NULL; - } - - UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", - ucnv_getName(cnv, status), cnv, stackBuffer); - - if (cnv->sharedData->impl->safeClone != NULL) { - /* call the custom safeClone function for sizing */ - bufferSizeNeeded = 0; - cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); - if (U_FAILURE(*status)) { - UTRACE_EXIT_STATUS(*status); - return NULL; - } - } - else - { - /* inherent sizing */ - bufferSizeNeeded = sizeof(UConverter); - } - - if (pBufferSize == NULL) { - stackBufferSize = 1; - pBufferSize = &stackBufferSize; - } else { - stackBufferSize = *pBufferSize; - if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - UTRACE_EXIT_VALUE(bufferSizeNeeded); - return NULL; - } - } - - /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter. - * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed. - */ - if (stackBuffer) { - uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer); - uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1); - ptrdiff_t pointerAdjustment = aligned_p - p; - if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) { - stackBuffer = reinterpret_cast<void *>(aligned_p); - stackBufferSize -= static_cast<int32_t>(pointerAdjustment); - } else { - /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ - stackBufferSize = 1; - } - } - - /* Now, see if we must allocate any memory */ - if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) - { - /* allocate one here...*/ - localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); - - if(localConverter == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - UTRACE_EXIT_STATUS(*status); - return NULL; - } - *status = U_SAFECLONE_ALLOCATED_WARNING; - - /* record the fact that memory was allocated */ - *pBufferSize = bufferSizeNeeded; - } else { - /* just use the stack buffer */ - localConverter = (UConverter*) stackBuffer; - allocatedConverter = NULL; - } - - uprv_memset(localConverter, 0, bufferSizeNeeded); - - /* Copy initial state */ - uprv_memcpy(localConverter, cnv, sizeof(UConverter)); - localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; - - /* copy the substitution string */ - if (cnv->subChars == (uint8_t *)cnv->subUChars) { - localConverter->subChars = (uint8_t *)localConverter->subUChars; - } else { - localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - if (localConverter->subChars == NULL) { - uprv_free(allocatedConverter); - UTRACE_EXIT_STATUS(*status); - return NULL; - } - uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - } - - /* now either call the safeclone fcn or not */ - if (cnv->sharedData->impl->safeClone != NULL) { - /* call the custom safeClone function */ - localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); - } - - if(localConverter==NULL || U_FAILURE(*status)) { - if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { - uprv_free(allocatedConverter->subChars); - } - uprv_free(allocatedConverter); - UTRACE_EXIT_STATUS(*status); - return NULL; - } - - /* increment refcount of shared data if needed */ - if (cnv->sharedData->isReferenceCounted) { - ucnv_incrementRefCount(cnv->sharedData); - } - - if(localConverter == (UConverter*)stackBuffer) { - /* we're using user provided data - set to not destroy */ - localConverter->isCopyLocal = TRUE; - } - - /* allow callback functions to handle any memory allocation */ - toUArgs.converter = fromUArgs.converter = localConverter; - cbErr = U_ZERO_ERROR; - cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); - cbErr = U_ZERO_ERROR; - cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); - - UTRACE_EXIT_PTR_STATUS(localConverter, *status); - return localConverter; -} - - - -/*Decreases the reference counter in the shared immutable section of the object - *and frees the mutable part*/ - -U_CAPI void U_EXPORT2 -ucnv_close (UConverter * converter) -{ - UErrorCode errorCode = U_ZERO_ERROR; - - UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); - - if (converter == NULL) - { - UTRACE_EXIT(); - return; - } - - UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", - ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); - - /* In order to speed up the close, only call the callbacks when they have been changed. - This performance check will only work when the callbacks are set within a shared library - or from user code that statically links this code. */ - /* first, notify the callback functions that the converter is closed */ - if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - - toUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); - } - if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - fromUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); - } - - if (converter->sharedData->impl->close != NULL) { - converter->sharedData->impl->close(converter); - } - - if (converter->subChars != (uint8_t *)converter->subUChars) { - uprv_free(converter->subChars); - } - - if (converter->sharedData->isReferenceCounted) { - ucnv_unloadSharedDataIfReady(converter->sharedData); - } - - if(!converter->isCopyLocal){ - uprv_free(converter); - } - - UTRACE_EXIT(); -} - -/*returns a single Name from the list, will return NULL if out of bounds - */ -U_CAPI const char* U_EXPORT2 -ucnv_getAvailableName (int32_t n) -{ - if (0 <= n && n <= 0xffff) { - UErrorCode err = U_ZERO_ERROR; - const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); - if (U_SUCCESS(err)) { - return name; - } - } - return NULL; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_countAvailable () -{ - UErrorCode err = U_ZERO_ERROR; - return ucnv_bld_countAvailableConverters(&err); -} - -U_CAPI void U_EXPORT2 -ucnv_getSubstChars (const UConverter * converter, - char *mySubChar, - int8_t * len, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - - if (converter->subCharLen <= 0) { - /* Unicode string or empty string from ucnv_setSubstString(). */ - *len = 0; - return; - } - - if (*len < converter->subCharLen) /*not enough space in subChars */ - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - - uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ - *len = converter->subCharLen; /*store # of bytes copied to buffer */ -} - -U_CAPI void U_EXPORT2 -ucnv_setSubstChars (UConverter * converter, - const char *mySubChar, - int8_t len, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - - /*Makes sure that the subChar is within the codepages char length boundaries */ - if ((len > converter->sharedData->staticData->maxBytesPerChar) - || (len < converter->sharedData->staticData->minBytesPerChar)) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ - converter->subCharLen = len; /*sets the new len */ - - /* - * There is currently (2001Feb) no separate API to set/get subChar1. - * In order to always have subChar written after it is explicitly set, - * we set subChar1 to 0. - */ - converter->subChar1 = 0; - - return; -} - -U_CAPI void U_EXPORT2 -ucnv_setSubstString(UConverter *cnv, - const UChar *s, - int32_t length, - UErrorCode *err) { - alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE]; - char chars[UCNV_ERROR_BUFFER_LENGTH]; - - UConverter *clone; - uint8_t *subChars; - int32_t cloneSize, length8; - - /* Let the following functions check all arguments. */ - cloneSize = sizeof(cloneBuffer); - clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); - ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); - length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); - ucnv_close(clone); - if (U_FAILURE(*err)) { - return; - } - - if (cnv->sharedData->impl->writeSub == NULL -#if !UCONFIG_NO_LEGACY_CONVERSION - || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && - ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) -#endif - ) { - /* The converter is not stateful. Store the charset bytes as a fixed string. */ - subChars = (uint8_t *)chars; - } else { - /* - * The converter has a non-default writeSub() function, indicating - * that it is stateful. - * Store the Unicode string for on-the-fly conversion for correct - * state handling. - */ - if (length > UCNV_ERROR_BUFFER_LENGTH) { - /* - * Should not occur. The converter should output at least one byte - * per UChar, which means that ucnv_fromUChars() should catch all - * overflows. - */ - *err = U_BUFFER_OVERFLOW_ERROR; - return; - } - subChars = (uint8_t *)s; - if (length < 0) { - length = u_strlen(s); - } - length8 = length * U_SIZEOF_UCHAR; - } - - /* - * For storing the substitution string, select either the small buffer inside - * UConverter or allocate a subChars buffer. - */ - if (length8 > UCNV_MAX_SUBCHAR_LEN) { - /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ - if (cnv->subChars == (uint8_t *)cnv->subUChars) { - /* Allocate a new buffer for the string. */ - cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - if (cnv->subChars == NULL) { - cnv->subChars = (uint8_t *)cnv->subUChars; - *err = U_MEMORY_ALLOCATION_ERROR; - return; - } - uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); - } - } - - /* Copy the substitution string into the UConverter or its subChars buffer. */ - if (length8 == 0) { - cnv->subCharLen = 0; - } else { - uprv_memcpy(cnv->subChars, subChars, length8); - if (subChars == (uint8_t *)chars) { - cnv->subCharLen = (int8_t)length8; - } else /* subChars == s */ { - cnv->subCharLen = (int8_t)-length; - } - } - - /* See comment in ucnv_setSubstChars(). */ - cnv->subChar1 = 0; -} - -/*resets the internal states of a converter - *goal : have the same behaviour than a freshly created converter - */ -static void _reset(UConverter *converter, UConverterResetChoice choice, - UBool callCallback) { - if(converter == NULL) { - return; - } - - if(callCallback) { - /* first, notify the callback functions that the converter is reset */ - UErrorCode errorCode; - - if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { - UConverterToUnicodeArgs toUArgs = { - sizeof(UConverterToUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - toUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); - } - if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { - UConverterFromUnicodeArgs fromUArgs = { - sizeof(UConverterFromUnicodeArgs), - TRUE, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL - }; - fromUArgs.converter = converter; - errorCode = U_ZERO_ERROR; - converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); - } - } - - /* now reset the converter itself */ - if(choice<=UCNV_RESET_TO_UNICODE) { - converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; - converter->mode = 0; - converter->toULength = 0; - converter->invalidCharLength = converter->UCharErrorBufferLength = 0; - converter->preToULength = 0; - } - if(choice!=UCNV_RESET_TO_UNICODE) { - converter->fromUnicodeStatus = 0; - converter->fromUChar32 = 0; - converter->invalidUCharLength = converter->charErrorBufferLength = 0; - converter->preFromUFirstCP = U_SENTINEL; - converter->preFromULength = 0; - } - - if (converter->sharedData->impl->reset != NULL) { - /* call the custom reset function */ - converter->sharedData->impl->reset(converter, choice); - } -} - -U_CAPI void U_EXPORT2 -ucnv_reset(UConverter *converter) -{ - _reset(converter, UCNV_RESET_BOTH, TRUE); -} - -U_CAPI void U_EXPORT2 -ucnv_resetToUnicode(UConverter *converter) -{ - _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); -} - -U_CAPI void U_EXPORT2 -ucnv_resetFromUnicode(UConverter *converter) -{ - _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); -} - -U_CAPI int8_t U_EXPORT2 -ucnv_getMaxCharSize (const UConverter * converter) -{ - return converter->maxBytesPerUChar; -} - - -U_CAPI int8_t U_EXPORT2 -ucnv_getMinCharSize (const UConverter * converter) -{ - return converter->sharedData->staticData->minBytesPerChar; -} - -U_CAPI const char* U_EXPORT2 -ucnv_getName (const UConverter * converter, UErrorCode * err) +ucnv_open (const char *name, + UErrorCode * err) +{ + UConverter *r; + + if (err == NULL || U_FAILURE (*err)) { + return NULL; + } + + r = ucnv_createConverter(NULL, name, err); + return r; +} + +U_CAPI UConverter* U_EXPORT2 +ucnv_openPackage (const char *packageName, const char *converterName, UErrorCode * err) +{ + return ucnv_createConverterFromPackage(packageName, converterName, err); +} + +/*Extracts the UChar* to a char* and calls through createConverter */ +U_CAPI UConverter* U_EXPORT2 +ucnv_openU (const UChar * name, + UErrorCode * err) +{ + char asciiName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + if (err == NULL || U_FAILURE(*err)) + return NULL; + if (name == NULL) + return ucnv_open (NULL, err); + if (u_strlen(name) >= UCNV_MAX_CONVERTER_NAME_LENGTH) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + return ucnv_open(u_austrcpy(asciiName, name), err); +} + +/* Copy the string that is represented by the UConverterPlatform enum + * @param platformString An output buffer + * @param platform An enum representing a platform + * @return the length of the copied string. + */ +static int32_t +ucnv_copyPlatformString(char *platformString, UConverterPlatform pltfrm) +{ + switch (pltfrm) + { + case UCNV_IBM: + uprv_strcpy(platformString, "ibm-"); + return 4; + case UCNV_UNKNOWN: + break; + } + + /* default to empty string */ + *platformString = 0; + return 0; +} + +/*Assumes a $platform-#codepage.$CONVERTER_FILE_EXTENSION scheme and calls + *through createConverter*/ +U_CAPI UConverter* U_EXPORT2 +ucnv_openCCSID (int32_t codepage, + UConverterPlatform platform, + UErrorCode * err) +{ + char myName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + int32_t myNameLen; + + if (err == NULL || U_FAILURE (*err)) + return NULL; + + /* ucnv_copyPlatformString could return "ibm-" or "cp" */ + myNameLen = ucnv_copyPlatformString(myName, platform); + T_CString_integerToString(myName + myNameLen, codepage, 10); + + return ucnv_createConverter(NULL, myName, err); +} + +/* Creating a temporary stack-based object that can be used in one thread, +and created from a converter that is shared across threads. +*/ + +U_CAPI UConverter* U_EXPORT2 +ucnv_safeClone(const UConverter* cnv, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status) +{ + UConverter *localConverter, *allocatedConverter; + int32_t stackBufferSize; + int32_t bufferSizeNeeded; + UErrorCode cbErr; + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + UTRACE_ENTRY_OC(UTRACE_UCNV_CLONE); + + if (status == NULL || U_FAILURE(*status)){ + UTRACE_EXIT_STATUS(status? *status: U_ILLEGAL_ARGUMENT_ERROR); + return NULL; + } + + if (cnv == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(*status); + return NULL; + } + + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "clone converter %s at %p into stackBuffer %p", + ucnv_getName(cnv, status), cnv, stackBuffer); + + if (cnv->sharedData->impl->safeClone != NULL) { + /* call the custom safeClone function for sizing */ + bufferSizeNeeded = 0; + cnv->sharedData->impl->safeClone(cnv, NULL, &bufferSizeNeeded, status); + if (U_FAILURE(*status)) { + UTRACE_EXIT_STATUS(*status); + return NULL; + } + } + else + { + /* inherent sizing */ + bufferSizeNeeded = sizeof(UConverter); + } + + if (pBufferSize == NULL) { + stackBufferSize = 1; + pBufferSize = &stackBufferSize; + } else { + stackBufferSize = *pBufferSize; + if (stackBufferSize <= 0){ /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + UTRACE_EXIT_VALUE(bufferSizeNeeded); + return NULL; + } + } + + /* Adjust (if necessary) the stackBuffer pointer to be aligned correctly for a UConverter. + * TODO(Jira ICU-20736) Redo this using std::align() once g++4.9 compatibility is no longer needed. + */ + if (stackBuffer) { + uintptr_t p = reinterpret_cast<uintptr_t>(stackBuffer); + uintptr_t aligned_p = (p + alignof(UConverter) - 1) & ~(alignof(UConverter) - 1); + ptrdiff_t pointerAdjustment = aligned_p - p; + if (bufferSizeNeeded + pointerAdjustment <= stackBufferSize) { + stackBuffer = reinterpret_cast<void *>(aligned_p); + stackBufferSize -= static_cast<int32_t>(pointerAdjustment); + } else { + /* prevent using the stack buffer but keep the size > 0 so that we do not just preflight */ + stackBufferSize = 1; + } + } + + /* Now, see if we must allocate any memory */ + if (stackBufferSize < bufferSizeNeeded || stackBuffer == NULL) + { + /* allocate one here...*/ + localConverter = allocatedConverter = (UConverter *) uprv_malloc (bufferSizeNeeded); + + if(localConverter == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + UTRACE_EXIT_STATUS(*status); + return NULL; + } + *status = U_SAFECLONE_ALLOCATED_WARNING; + + /* record the fact that memory was allocated */ + *pBufferSize = bufferSizeNeeded; + } else { + /* just use the stack buffer */ + localConverter = (UConverter*) stackBuffer; + allocatedConverter = NULL; + } + + uprv_memset(localConverter, 0, bufferSizeNeeded); + + /* Copy initial state */ + uprv_memcpy(localConverter, cnv, sizeof(UConverter)); + localConverter->isCopyLocal = localConverter->isExtraLocal = FALSE; + + /* copy the substitution string */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + localConverter->subChars = (uint8_t *)localConverter->subUChars; + } else { + localConverter->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (localConverter->subChars == NULL) { + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + uprv_memcpy(localConverter->subChars, cnv->subChars, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + + /* now either call the safeclone fcn or not */ + if (cnv->sharedData->impl->safeClone != NULL) { + /* call the custom safeClone function */ + localConverter = cnv->sharedData->impl->safeClone(cnv, localConverter, pBufferSize, status); + } + + if(localConverter==NULL || U_FAILURE(*status)) { + if (allocatedConverter != NULL && allocatedConverter->subChars != (uint8_t *)allocatedConverter->subUChars) { + uprv_free(allocatedConverter->subChars); + } + uprv_free(allocatedConverter); + UTRACE_EXIT_STATUS(*status); + return NULL; + } + + /* increment refcount of shared data if needed */ + if (cnv->sharedData->isReferenceCounted) { + ucnv_incrementRefCount(cnv->sharedData); + } + + if(localConverter == (UConverter*)stackBuffer) { + /* we're using user provided data - set to not destroy */ + localConverter->isCopyLocal = TRUE; + } + + /* allow callback functions to handle any memory allocation */ + toUArgs.converter = fromUArgs.converter = localConverter; + cbErr = U_ZERO_ERROR; + cnv->fromCharErrorBehaviour(cnv->toUContext, &toUArgs, NULL, 0, UCNV_CLONE, &cbErr); + cbErr = U_ZERO_ERROR; + cnv->fromUCharErrorBehaviour(cnv->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLONE, &cbErr); + + UTRACE_EXIT_PTR_STATUS(localConverter, *status); + return localConverter; +} + + + +/*Decreases the reference counter in the shared immutable section of the object + *and frees the mutable part*/ + +U_CAPI void U_EXPORT2 +ucnv_close (UConverter * converter) +{ + UErrorCode errorCode = U_ZERO_ERROR; + + UTRACE_ENTRY_OC(UTRACE_UCNV_CLOSE); + + if (converter == NULL) + { + UTRACE_EXIT(); + return; + } + + UTRACE_DATA3(UTRACE_OPEN_CLOSE, "close converter %s at %p, isCopyLocal=%b", + ucnv_getName(converter, &errorCode), converter, converter->isCopyLocal); + + /* In order to speed up the close, only call the callbacks when they have been changed. + This performance check will only work when the callbacks are set within a shared library + or from user code that statically links this code. */ + /* first, notify the callback functions that the converter is closed */ + if (converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_CLOSE, &errorCode); + } + if (converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_CLOSE, &errorCode); + } + + if (converter->sharedData->impl->close != NULL) { + converter->sharedData->impl->close(converter); + } + + if (converter->subChars != (uint8_t *)converter->subUChars) { + uprv_free(converter->subChars); + } + + if (converter->sharedData->isReferenceCounted) { + ucnv_unloadSharedDataIfReady(converter->sharedData); + } + + if(!converter->isCopyLocal){ + uprv_free(converter); + } + + UTRACE_EXIT(); +} + +/*returns a single Name from the list, will return NULL if out of bounds + */ +U_CAPI const char* U_EXPORT2 +ucnv_getAvailableName (int32_t n) +{ + if (0 <= n && n <= 0xffff) { + UErrorCode err = U_ZERO_ERROR; + const char *name = ucnv_bld_getAvailableConverter((uint16_t)n, &err); + if (U_SUCCESS(err)) { + return name; + } + } + return NULL; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_countAvailable () +{ + UErrorCode err = U_ZERO_ERROR; + return ucnv_bld_countAvailableConverters(&err); +} + +U_CAPI void U_EXPORT2 +ucnv_getSubstChars (const UConverter * converter, + char *mySubChar, + int8_t * len, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + + if (converter->subCharLen <= 0) { + /* Unicode string or empty string from ucnv_setSubstString(). */ + *len = 0; + return; + } + + if (*len < converter->subCharLen) /*not enough space in subChars */ + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + + uprv_memcpy (mySubChar, converter->subChars, converter->subCharLen); /*fills in the subchars */ + *len = converter->subCharLen; /*store # of bytes copied to buffer */ +} + +U_CAPI void U_EXPORT2 +ucnv_setSubstChars (UConverter * converter, + const char *mySubChar, + int8_t len, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + + /*Makes sure that the subChar is within the codepages char length boundaries */ + if ((len > converter->sharedData->staticData->maxBytesPerChar) + || (len < converter->sharedData->staticData->minBytesPerChar)) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + uprv_memcpy (converter->subChars, mySubChar, len); /*copies the subchars */ + converter->subCharLen = len; /*sets the new len */ + + /* + * There is currently (2001Feb) no separate API to set/get subChar1. + * In order to always have subChar written after it is explicitly set, + * we set subChar1 to 0. + */ + converter->subChar1 = 0; + + return; +} + +U_CAPI void U_EXPORT2 +ucnv_setSubstString(UConverter *cnv, + const UChar *s, + int32_t length, + UErrorCode *err) { + alignas(UConverter) char cloneBuffer[U_CNV_SAFECLONE_BUFFERSIZE]; + char chars[UCNV_ERROR_BUFFER_LENGTH]; + + UConverter *clone; + uint8_t *subChars; + int32_t cloneSize, length8; + + /* Let the following functions check all arguments. */ + cloneSize = sizeof(cloneBuffer); + clone = ucnv_safeClone(cnv, cloneBuffer, &cloneSize, err); + ucnv_setFromUCallBack(clone, UCNV_FROM_U_CALLBACK_STOP, NULL, NULL, NULL, err); + length8 = ucnv_fromUChars(clone, chars, (int32_t)sizeof(chars), s, length, err); + ucnv_close(clone); + if (U_FAILURE(*err)) { + return; + } + + if (cnv->sharedData->impl->writeSub == NULL +#if !UCONFIG_NO_LEGACY_CONVERSION + || (cnv->sharedData->staticData->conversionType == UCNV_MBCS && + ucnv_MBCSGetType(cnv) != UCNV_EBCDIC_STATEFUL) +#endif + ) { + /* The converter is not stateful. Store the charset bytes as a fixed string. */ + subChars = (uint8_t *)chars; + } else { + /* + * The converter has a non-default writeSub() function, indicating + * that it is stateful. + * Store the Unicode string for on-the-fly conversion for correct + * state handling. + */ + if (length > UCNV_ERROR_BUFFER_LENGTH) { + /* + * Should not occur. The converter should output at least one byte + * per UChar, which means that ucnv_fromUChars() should catch all + * overflows. + */ + *err = U_BUFFER_OVERFLOW_ERROR; + return; + } + subChars = (uint8_t *)s; + if (length < 0) { + length = u_strlen(s); + } + length8 = length * U_SIZEOF_UCHAR; + } + + /* + * For storing the substitution string, select either the small buffer inside + * UConverter or allocate a subChars buffer. + */ + if (length8 > UCNV_MAX_SUBCHAR_LEN) { + /* Use a separate buffer for the string. Outside UConverter to not make it too large. */ + if (cnv->subChars == (uint8_t *)cnv->subUChars) { + /* Allocate a new buffer for the string. */ + cnv->subChars = (uint8_t *)uprv_malloc(UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + if (cnv->subChars == NULL) { + cnv->subChars = (uint8_t *)cnv->subUChars; + *err = U_MEMORY_ALLOCATION_ERROR; + return; + } + uprv_memset(cnv->subChars, 0, UCNV_ERROR_BUFFER_LENGTH * U_SIZEOF_UCHAR); + } + } + + /* Copy the substitution string into the UConverter or its subChars buffer. */ + if (length8 == 0) { + cnv->subCharLen = 0; + } else { + uprv_memcpy(cnv->subChars, subChars, length8); + if (subChars == (uint8_t *)chars) { + cnv->subCharLen = (int8_t)length8; + } else /* subChars == s */ { + cnv->subCharLen = (int8_t)-length; + } + } + + /* See comment in ucnv_setSubstChars(). */ + cnv->subChar1 = 0; +} + +/*resets the internal states of a converter + *goal : have the same behaviour than a freshly created converter + */ +static void _reset(UConverter *converter, UConverterResetChoice choice, + UBool callCallback) { + if(converter == NULL) { + return; + } + + if(callCallback) { + /* first, notify the callback functions that the converter is reset */ + UErrorCode errorCode; + + if(choice<=UCNV_RESET_TO_UNICODE && converter->fromCharErrorBehaviour != UCNV_TO_U_DEFAULT_CALLBACK) { + UConverterToUnicodeArgs toUArgs = { + sizeof(UConverterToUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + toUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromCharErrorBehaviour(converter->toUContext, &toUArgs, NULL, 0, UCNV_RESET, &errorCode); + } + if(choice!=UCNV_RESET_TO_UNICODE && converter->fromUCharErrorBehaviour != UCNV_FROM_U_DEFAULT_CALLBACK) { + UConverterFromUnicodeArgs fromUArgs = { + sizeof(UConverterFromUnicodeArgs), + TRUE, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL + }; + fromUArgs.converter = converter; + errorCode = U_ZERO_ERROR; + converter->fromUCharErrorBehaviour(converter->fromUContext, &fromUArgs, NULL, 0, 0, UCNV_RESET, &errorCode); + } + } + + /* now reset the converter itself */ + if(choice<=UCNV_RESET_TO_UNICODE) { + converter->toUnicodeStatus = converter->sharedData->toUnicodeStatus; + converter->mode = 0; + converter->toULength = 0; + converter->invalidCharLength = converter->UCharErrorBufferLength = 0; + converter->preToULength = 0; + } + if(choice!=UCNV_RESET_TO_UNICODE) { + converter->fromUnicodeStatus = 0; + converter->fromUChar32 = 0; + converter->invalidUCharLength = converter->charErrorBufferLength = 0; + converter->preFromUFirstCP = U_SENTINEL; + converter->preFromULength = 0; + } + + if (converter->sharedData->impl->reset != NULL) { + /* call the custom reset function */ + converter->sharedData->impl->reset(converter, choice); + } +} + +U_CAPI void U_EXPORT2 +ucnv_reset(UConverter *converter) +{ + _reset(converter, UCNV_RESET_BOTH, TRUE); +} + +U_CAPI void U_EXPORT2 +ucnv_resetToUnicode(UConverter *converter) +{ + _reset(converter, UCNV_RESET_TO_UNICODE, TRUE); +} + +U_CAPI void U_EXPORT2 +ucnv_resetFromUnicode(UConverter *converter) +{ + _reset(converter, UCNV_RESET_FROM_UNICODE, TRUE); +} + +U_CAPI int8_t U_EXPORT2 +ucnv_getMaxCharSize (const UConverter * converter) +{ + return converter->maxBytesPerUChar; +} + + +U_CAPI int8_t U_EXPORT2 +ucnv_getMinCharSize (const UConverter * converter) +{ + return converter->sharedData->staticData->minBytesPerChar; +} + +U_CAPI const char* U_EXPORT2 +ucnv_getName (const UConverter * converter, UErrorCode * err) + +{ + if (U_FAILURE (*err)) + return NULL; + if(converter->sharedData->impl->getName){ + const char* temp= converter->sharedData->impl->getName(converter); + if(temp) + return temp; + } + return converter->sharedData->staticData->name; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_getCCSID(const UConverter * converter, + UErrorCode * err) +{ + int32_t ccsid; + if (U_FAILURE (*err)) + return -1; + + ccsid = converter->sharedData->staticData->codepage; + if (ccsid == 0) { + /* Rare case. This is for cases like gb18030, + which doesn't have an IBM canonical name, but does have an IBM alias. */ + const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); + if (U_SUCCESS(*err) && standardName) { + const char *ccsidStr = uprv_strchr(standardName, '-'); + if (ccsidStr) { + ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ + } + } + } + return ccsid; +} + + +U_CAPI UConverterPlatform U_EXPORT2 +ucnv_getPlatform (const UConverter * converter, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return UCNV_UNKNOWN; + + return (UConverterPlatform)converter->sharedData->staticData->platform; +} + +U_CAPI void U_EXPORT2 + ucnv_getToUCallBack (const UConverter * converter, + UConverterToUCallback *action, + const void **context) +{ + *action = converter->fromCharErrorBehaviour; + *context = converter->toUContext; +} + +U_CAPI void U_EXPORT2 + ucnv_getFromUCallBack (const UConverter * converter, + UConverterFromUCallback *action, + const void **context) +{ + *action = converter->fromUCharErrorBehaviour; + *context = converter->fromUContext; +} + +U_CAPI void U_EXPORT2 +ucnv_setToUCallBack (UConverter * converter, + UConverterToUCallback newAction, + const void* newContext, + UConverterToUCallback *oldAction, + const void** oldContext, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + if (oldAction) *oldAction = converter->fromCharErrorBehaviour; + converter->fromCharErrorBehaviour = newAction; + if (oldContext) *oldContext = converter->toUContext; + converter->toUContext = newContext; +} + +U_CAPI void U_EXPORT2 +ucnv_setFromUCallBack (UConverter * converter, + UConverterFromUCallback newAction, + const void* newContext, + UConverterFromUCallback *oldAction, + const void** oldContext, + UErrorCode * err) +{ + if (U_FAILURE (*err)) + return; + if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; + converter->fromUCharErrorBehaviour = newAction; + if (oldContext) *oldContext = converter->fromUContext; + converter->fromUContext = newContext; +} + +static void +_updateOffsets(int32_t *offsets, int32_t length, + int32_t sourceIndex, int32_t errorInputLength) { + int32_t *limit; + int32_t delta, offset; + + if(sourceIndex>=0) { + /* + * adjust each offset by adding the previous sourceIndex + * minus the length of the input sequence that caused an + * error, if any + */ + delta=sourceIndex-errorInputLength; + } else { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + */ + delta=-1; + } + + limit=offsets+length; + if(delta==0) { + /* most common case, nothing to do */ + } else if(delta>0) { + /* add the delta to each offset (but not if the offset is <0) */ + while(offsets<limit) { + offset=*offsets; + if(offset>=0) { + *offsets=offset+delta; + } + ++offsets; + } + } else /* delta<0 */ { + /* + * set each offset to -1 because this conversion function + * does not handle offsets + * or the error input sequence started in a previous buffer + */ + while(offsets<limit) { + *offsets++=-1; + } + } +} + +/* ucnv_fromUnicode --------------------------------------------------------- */ + +/* + * Implementation note for m:n conversions + * + * While collecting source units to find the longest match for m:n conversion, + * some source units may need to be stored for a partial match. + * When a second buffer does not yield a match on all of the previously stored + * source units, then they must be "replayed", i.e., fed back into the converter. + * + * The code relies on the fact that replaying will not nest - + * converting a replay buffer will not result in a replay. + * This is because a replay is necessary only after the _continuation_ of a + * partial match failed, but a replay buffer is converted as a whole. + * It may result in some of its units being stored again for a partial match, + * but there will not be a continuation _during_ the replay which could fail. + * + * It is conceivable that a callback function could call the converter + * recursively in a way that causes another replay to be stored, but that + * would be an error in the callback function. + * Such violations will cause assertion failures in a debug build, + * and wrong output, but they will not cause a crash. + */ + +static void +_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { + UConverterFromUnicode fromUnicode; + UConverter *cnv; + const UChar *s; + char *t; + int32_t *offsets; + int32_t sourceIndex; + int32_t errorInputLength; + UBool converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + UChar replay[UCNV_EXT_MAX_UCHARS]; + const UChar *realSource, *realSourceLimit; + int32_t realSourceIndex; + UBool realFlush; + + cnv=pArgs->converter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + fromUnicode=cnv->sharedData->impl->fromUnicode; + } else { + fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; + if(fromUnicode==NULL) { + /* there is no WithOffsets implementation */ + fromUnicode=cnv->sharedData->impl->fromUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preFromULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preFromULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + fromUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preFromULength==0 because a replay (<0) will cause + * s<sourceLimit before converterSawEndOfInput is checked + */ + converterSawEndOfInput= + (UBool)(U_SUCCESS(*err) && + pArgs->flush && pArgs->source==pArgs->sourceLimit && + cnv->fromUChar32==0); + } else { + /* handle error from ucnv_convertEx() */ + converterSawEndOfInput=FALSE; + } + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preFromULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preFromULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preFromULength)<0) { + sourceIndex=-1; + } + + cnv->preFromULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(s<pArgs->sourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->fromUChar32!=0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preFromULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + u_memcpy(cnv->preFromU, pArgs->source, length); + cnv->preFromULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* callback handling */ + { + UChar32 codePoint; + + /* get and write the code point */ + codePoint=cnv->fromUChar32; + errorInputLength=0; + U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); + cnv->invalidUCharLength=(int8_t)errorInputLength; + + /* set the converter state to deal with the next character */ + cnv->fromUChar32=0; + + /* call the callback function */ + cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, + cnv->invalidUCharBuffer, errorInputLength, codePoint, + *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, + err); + } + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +/* + * Output the fromUnicode overflow buffer. + * Call this function if(cnv->charErrorBufferLength>0). + * @return TRUE if overflow + */ +static UBool +ucnv_outputOverflowFromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + int32_t **pOffsets, + UErrorCode *err) { + int32_t *offsets; + char *overflow, *t; + int32_t i, length; + + t=*target; + if(pOffsets!=NULL) { + offsets=*pOffsets; + } else { + offsets=NULL; + } + + overflow=(char *)cnv->charErrorBuffer; + length=cnv->charErrorBufferLength; + i=0; + while(i<length) { + if(t==targetLimit) { + /* the overflow buffer contains too much, keep the rest */ + int32_t j=0; + + do { + overflow[j++]=overflow[i++]; + } while(i<length); + + cnv->charErrorBufferLength=(int8_t)j; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + *err=U_BUFFER_OVERFLOW_ERROR; + return TRUE; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } + + /* the overflow buffer is completely copied to the target */ + cnv->charErrorBufferLength=0; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ucnv_fromUnicode(UConverter *cnv, + char **target, const char *targetLimit, + const UChar **source, const UChar *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterFromUnicodeArgs args; + const UChar *s; + char *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return; + } + + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + + if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { + /* + Prevent code from going into an infinite loop in case we do hit this + limit. The limit pointer is expected to be on a UChar * boundary. + This also prevents the next argument check from failing. + */ + sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); + } + + /* + * All these conditions should never happen. + * + * 1) Make sure that the limits are >= to the address source or target + * + * 2) Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be targetLimit=t+0x7fffffff; for example. + * + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer + * to a char * pointer and provide an incomplete UChar code unit. + */ + if (sourceLimit<s || targetLimit<t || + ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || + (((const char *)sourceLimit-(const char *)s) & 1) != 0) + { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* output the target overflow buffer */ + if( cnv->charErrorBufferLength>0 && + ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) + ) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target may have moved, therefore stop using t */ + + if(!flush && s==sourceLimit && cnv->preFromULength>=0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=*target; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _fromUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_toUnicode() --------------------------------------------------------- */ + +static void +_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { + UConverterToUnicode toUnicode; + UConverter *cnv; + const char *s; + UChar *t; + int32_t *offsets; + int32_t sourceIndex; + int32_t errorInputLength; + UBool converterSawEndOfInput, calledCallback; + + /* variables for m:n conversion */ + char replay[UCNV_EXT_MAX_BYTES]; + const char *realSource, *realSourceLimit; + int32_t realSourceIndex; + UBool realFlush; + + cnv=pArgs->converter; + s=pArgs->source; + t=pArgs->target; + offsets=pArgs->offsets; + + /* get the converter implementation function */ + sourceIndex=0; + if(offsets==NULL) { + toUnicode=cnv->sharedData->impl->toUnicode; + } else { + toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; + if(toUnicode==NULL) { + /* there is no WithOffsets implementation */ + toUnicode=cnv->sharedData->impl->toUnicode; + /* we will write -1 for each offset */ + sourceIndex=-1; + } + } + + if(cnv->preToULength>=0) { + /* normal mode */ + realSource=NULL; + + /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ + realSourceLimit=NULL; + realFlush=FALSE; + realSourceIndex=0; + } else { + /* + * Previous m:n conversion stored source units from a partial match + * and failed to consume all of them. + * We need to "replay" them from a temporary buffer and convert them first. + */ + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + sourceIndex=-1; + + cnv->preToULength=0; + } + + /* + * loop for conversion and error handling + * + * loop { + * convert + * loop { + * update offsets + * handle end of input + * handle errors/call callback + * } + * } + */ + for(;;) { + if(U_SUCCESS(*err)) { + /* convert */ + toUnicode(pArgs, err); + + /* + * set a flag for whether the converter + * successfully processed the end of the input + * + * need not check cnv->preToULength==0 because a replay (<0) will cause + * s<sourceLimit before converterSawEndOfInput is checked + */ + converterSawEndOfInput= + (UBool)(U_SUCCESS(*err) && + pArgs->flush && pArgs->source==pArgs->sourceLimit && + cnv->toULength==0); + } else { + /* handle error from getNextUChar() or ucnv_convertEx() */ + converterSawEndOfInput=FALSE; + } + + /* no callback called yet for this iteration */ + calledCallback=FALSE; + + /* no sourceIndex adjustment for conversion, only for callback output */ + errorInputLength=0; + + /* + * loop for offsets and error handling + * + * iterates at most 3 times: + * 1. to clean up after the conversion function + * 2. after the callback + * 3. after the callback again if there was truncated input + */ + for(;;) { + /* update offsets if we write any */ + if(offsets!=NULL) { + int32_t length=(int32_t)(pArgs->target-t); + if(length>0) { + _updateOffsets(offsets, length, sourceIndex, errorInputLength); + + /* + * if a converter handles offsets and updates the offsets + * pointer at the end, then pArgs->offset should not change + * here; + * however, some converters do not handle offsets at all + * (sourceIndex<0) or may not update the offsets pointer + */ + pArgs->offsets=offsets+=length; + } + + if(sourceIndex>=0) { + sourceIndex+=(int32_t)(pArgs->source-s); + } + } + + if(cnv->preToULength<0) { + /* + * switch the source to new replay units (cannot occur while replaying) + * after offset handling and before end-of-input and callback handling + */ + if(realSource==NULL) { + realSource=pArgs->source; + realSourceLimit=pArgs->sourceLimit; + realFlush=pArgs->flush; + realSourceIndex=sourceIndex; + + uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); + pArgs->source=replay; + pArgs->sourceLimit=replay-cnv->preToULength; + pArgs->flush=FALSE; + if((sourceIndex+=cnv->preToULength)<0) { + sourceIndex=-1; + } + + cnv->preToULength=0; + } else { + /* see implementation note before _fromUnicodeWithCallback() */ + U_ASSERT(realSource==NULL); + *err=U_INTERNAL_PROGRAM_ERROR; + } + } + + /* update pointers */ + s=pArgs->source; + t=pArgs->target; + + if(U_SUCCESS(*err)) { + if(s<pArgs->sourceLimit) { + /* + * continue with the conversion loop while there is still input left + * (continue converting by breaking out of only the inner loop) + */ + break; + } else if(realSource!=NULL) { + /* switch back from replaying to the real source and continue */ + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + sourceIndex=realSourceIndex; + + realSource=NULL; + break; + } else if(pArgs->flush && cnv->toULength>0) { + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *err=U_TRUNCATED_CHAR_FOUND; + calledCallback=FALSE; /* new error condition */ + } else { + /* input consumed */ + if(pArgs->flush) { + /* + * return to the conversion loop once more if the flush + * flag is set and the conversion function has not + * successfully processed the end of the input yet + * + * (continue converting by breaking out of only the inner loop) + */ + if(!converterSawEndOfInput) { + break; + } + + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + } + + /* done successfully */ + return; + } + } + + /* U_FAILURE(*err) */ + { + UErrorCode e; + + if( calledCallback || + (e=*err)==U_BUFFER_OVERFLOW_ERROR || + (e!=U_INVALID_CHAR_FOUND && + e!=U_ILLEGAL_CHAR_FOUND && + e!=U_TRUNCATED_CHAR_FOUND && + e!=U_ILLEGAL_ESCAPE_SEQUENCE && + e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) + ) { + /* + * the callback did not or cannot resolve the error: + * set output pointers and return + * + * the check for buffer overflow is redundant but it is + * a high-runner case and hopefully documents the intent + * well + * + * if we were replaying, then the replay buffer must be + * copied back into the UConverter + * and the real arguments must be restored + */ + if(realSource!=NULL) { + int32_t length; + + U_ASSERT(cnv->preToULength==0); + + length=(int32_t)(pArgs->sourceLimit-pArgs->source); + if(length>0) { + uprv_memcpy(cnv->preToU, pArgs->source, length); + cnv->preToULength=(int8_t)-length; + } + + pArgs->source=realSource; + pArgs->sourceLimit=realSourceLimit; + pArgs->flush=realFlush; + } + + return; + } + } + + /* copy toUBytes[] to invalidCharBuffer[] */ + errorInputLength=cnv->invalidCharLength=cnv->toULength; + if(errorInputLength>0) { + uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); + } + + /* set the converter state to deal with the next character */ + cnv->toULength=0; + + /* call the callback function */ + if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { + cnv->toUCallbackReason = UCNV_UNASSIGNED; + } + cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, + cnv->invalidCharBuffer, errorInputLength, + cnv->toUCallbackReason, + err); + cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ + + /* + * loop back to the offset handling + * + * this flag will indicate after offset handling + * that a callback was called; + * if the callback did not resolve the error, then we return + */ + calledCallback=TRUE; + } + } +} + +/* + * Output the toUnicode overflow buffer. + * Call this function if(cnv->UCharErrorBufferLength>0). + * @return TRUE if overflow + */ +static UBool +ucnv_outputOverflowToUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + int32_t **pOffsets, + UErrorCode *err) { + int32_t *offsets; + UChar *overflow, *t; + int32_t i, length; + + t=*target; + if(pOffsets!=NULL) { + offsets=*pOffsets; + } else { + offsets=NULL; + } + + overflow=cnv->UCharErrorBuffer; + length=cnv->UCharErrorBufferLength; + i=0; + while(i<length) { + if(t==targetLimit) { + /* the overflow buffer contains too much, keep the rest */ + int32_t j=0; + + do { + overflow[j++]=overflow[i++]; + } while(i<length); + + cnv->UCharErrorBufferLength=(int8_t)j; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + *err=U_BUFFER_OVERFLOW_ERROR; + return TRUE; + } + + /* copy the overflow contents to the target */ + *t++=overflow[i++]; + if(offsets!=NULL) { + *offsets++=-1; /* no source index available for old output */ + } + } + + /* the overflow buffer is completely copied to the target */ + cnv->UCharErrorBufferLength=0; + *target=t; + if(offsets!=NULL) { + *pOffsets=offsets; + } + return FALSE; +} + +U_CAPI void U_EXPORT2 +ucnv_toUnicode(UConverter *cnv, + UChar **target, const UChar *targetLimit, + const char **source, const char *sourceLimit, + int32_t *offsets, + UBool flush, + UErrorCode *err) { + UConverterToUnicodeArgs args; + const char *s; + UChar *t; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return; + } + + if(cnv==NULL || target==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + + if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { + /* + Prevent code from going into an infinite loop in case we do hit this + limit. The limit pointer is expected to be on a UChar * boundary. + This also prevents the next argument check from failing. + */ + targetLimit = (const UChar *)(((const char *)targetLimit) - 1); + } + + /* + * All these conditions should never happen. + * + * 1) Make sure that the limits are >= to the address source or target + * + * 2) Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + * + * 3) Make sure that the user didn't incorrectly cast a UChar * pointer + * to a char * pointer and provide an incomplete UChar code unit. + */ + if (sourceLimit<s || targetLimit<t || + ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || + ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || + (((const char *)targetLimit-(const char *)t) & 1) != 0 + ) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* output the target overflow buffer */ + if( cnv->UCharErrorBufferLength>0 && + ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) + ) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target may have moved, therefore stop using t */ + + if(!flush && s==sourceLimit && cnv->preToULength>=0) { + /* the overflow buffer is emptied and there is no new input: we are done */ + return; + } + + /* + * Do not simply return with a buffer overflow error if + * !flush && t==targetLimit + * because it is possible that the source will not generate any output. + * For example, the skip callback may be called; + * it does not output anything. + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=flush; + args.offsets=offsets; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=*target; + args.targetLimit=targetLimit; + args.size=sizeof(args); + + _toUnicodeWithCallback(&args, err); + + *source=args.source; + *target=args.target; +} + +/* ucnv_to/fromUChars() ----------------------------------------------------- */ + +U_CAPI int32_t U_EXPORT2 +ucnv_fromUChars(UConverter *cnv, + char *dest, int32_t destCapacity, + const UChar *src, int32_t srcLength, + UErrorCode *pErrorCode) { + const UChar *srcLimit; + char *originalDest, *destLimit; + int32_t destLength; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || + destCapacity<0 || (destCapacity>0 && dest==NULL) || + srcLength<-1 || (srcLength!=0 && src==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* initialize */ + ucnv_resetFromUnicode(cnv); + originalDest=dest; + if(srcLength==-1) { + srcLength=u_strlen(src); + } + if(srcLength>0) { + srcLimit=src+srcLength; + destCapacity=pinCapacity(dest, destCapacity); + destLimit=dest+destCapacity; + + /* perform the conversion */ + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength=(int32_t)(dest-originalDest); + + /* if an overflow occurs, then get the preflighting length */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + char buffer[1024]; + + destLimit=buffer+sizeof(buffer); + do { + dest=buffer; + *pErrorCode=U_ZERO_ERROR; + ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength+=(int32_t)(dest-buffer); + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + } + } else { + destLength=0; + } + + return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toUChars(UConverter *cnv, + UChar *dest, int32_t destCapacity, + const char *src, int32_t srcLength, + UErrorCode *pErrorCode) { + const char *srcLimit; + UChar *originalDest, *destLimit; + int32_t destLength; + + /* check arguments */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || + destCapacity<0 || (destCapacity>0 && dest==NULL) || + srcLength<-1 || (srcLength!=0 && src==NULL)) + { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* initialize */ + ucnv_resetToUnicode(cnv); + originalDest=dest; + if(srcLength==-1) { + srcLength=(int32_t)uprv_strlen(src); + } + if(srcLength>0) { + srcLimit=src+srcLength; + destCapacity=pinCapacity(dest, destCapacity); + destLimit=dest+destCapacity; + + /* perform the conversion */ + ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength=(int32_t)(dest-originalDest); + + /* if an overflow occurs, then get the preflighting length */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) + { + UChar buffer[1024]; + + destLimit=buffer+UPRV_LENGTHOF(buffer); + do { + dest=buffer; + *pErrorCode=U_ZERO_ERROR; + ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); + destLength+=(int32_t)(dest-buffer); + } + while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + } + } else { + destLength=0; + } + + return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); +} + +/* ucnv_getNextUChar() ------------------------------------------------------ */ + +U_CAPI UChar32 U_EXPORT2 +ucnv_getNextUChar(UConverter *cnv, + const char **source, const char *sourceLimit, + UErrorCode *err) { + UConverterToUnicodeArgs args; + UChar buffer[U16_MAX_LENGTH]; + const char *s; + UChar32 c; + int32_t i, length; + + /* check parameters */ + if(err==NULL || U_FAILURE(*err)) { + return 0xffff; + } + + if(cnv==NULL || source==NULL) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + s=*source; + if(sourceLimit<s) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + /* + * Make sure that the buffer sizes do not exceed the number range for + * int32_t because some functions use the size (in units or bytes) + * rather than comparing pointers, and because offsets are int32_t values. + * + * size_t is guaranteed to be unsigned and large enough for the job. + * + * Return with an error instead of adjusting the limits because we would + * not be able to maintain the semantics that either the source must be + * consumed or the target filled (unless an error occurs). + * An adjustment would be sourceLimit=t+0x7fffffff; for example. + */ + if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { + *err=U_ILLEGAL_ARGUMENT_ERROR; + return 0xffff; + } + + c=U_SENTINEL; + + /* flush the target overflow buffer */ + if(cnv->UCharErrorBufferLength>0) { + UChar *overflow; + + overflow=cnv->UCharErrorBuffer; + i=0; + length=cnv->UCharErrorBufferLength; + U16_NEXT(overflow, i, length, c); + + /* move the remaining overflow contents up to the beginning */ + if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + + if(!U16_IS_LEAD(c) || i<length) { + return c; + } + /* + * Continue if the overflow buffer contained only a lead surrogate, + * in case the converter outputs single surrogates from complete + * input sequences. + */ + } + + /* + * flush==TRUE is implied for ucnv_getNextUChar() + * + * do not simply return even if s==sourceLimit because the converter may + * not have seen flush==TRUE before + */ + + /* prepare the converter arguments */ + args.converter=cnv; + args.flush=TRUE; + args.offsets=NULL; + args.source=s; + args.sourceLimit=sourceLimit; + args.target=buffer; + args.targetLimit=buffer+1; + args.size=sizeof(args); + + if(c<0) { + /* + * call the native getNextUChar() implementation if we are + * at a character boundary (toULength==0) + * + * unlike with _toUnicode(), getNextUChar() implementations must set + * U_TRUNCATED_CHAR_FOUND for truncated input, + * in addition to setting toULength/toUBytes[] + */ + if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { + c=cnv->sharedData->impl->getNextUChar(&args, err); + *source=s=args.source; + if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { + /* reset the converter without calling the callback function */ + _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); + return 0xffff; /* no output */ + } else if(U_SUCCESS(*err) && c>=0) { + return c; + /* + * else fall through to use _toUnicode() because + * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all + * U_FAILURE: call _toUnicode() for callback handling (do not output c) + */ + } + } + + /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ + _toUnicodeWithCallback(&args, err); + + if(*err==U_BUFFER_OVERFLOW_ERROR) { + *err=U_ZERO_ERROR; + } + + i=0; + length=(int32_t)(args.target-buffer); + } else { + /* write the lead surrogate from the overflow buffer */ + buffer[0]=(UChar)c; + args.target=buffer+1; + i=0; + length=1; + } + + /* buffer contents starts at i and ends before length */ + + if(U_FAILURE(*err)) { + c=0xffff; /* no output */ + } else if(length==0) { + /* no input or only state changes */ + *err=U_INDEX_OUTOFBOUNDS_ERROR; + /* no need to reset explicitly because _toUnicodeWithCallback() did it */ + c=0xffff; /* no output */ + } else { + c=buffer[0]; + i=1; + if(!U16_IS_LEAD(c)) { + /* consume c=buffer[0], done */ + } else { + /* got a lead surrogate, see if a trail surrogate follows */ + UChar c2; + + if(cnv->UCharErrorBufferLength>0) { + /* got overflow output from the conversion */ + if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { + /* got a trail surrogate, too */ + c=U16_GET_SUPPLEMENTARY(c, c2); + + /* move the remaining overflow contents up to the beginning */ + if((--cnv->UCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, + cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); + } + } else { + /* c is an unpaired lead surrogate, just return it */ + } + } else if(args.source<sourceLimit) { + /* convert once more, to buffer[1] */ + args.targetLimit=buffer+2; + _toUnicodeWithCallback(&args, err); + if(*err==U_BUFFER_OVERFLOW_ERROR) { + *err=U_ZERO_ERROR; + } + + length=(int32_t)(args.target-buffer); + if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { + /* got a trail surrogate, too */ + c=U16_GET_SUPPLEMENTARY(c, c2); + i=2; + } + } + } + } + + /* + * move leftover output from buffer[i..length[ + * into the beginning of the overflow buffer + */ + if(i<length) { + /* move further overflow back */ + int32_t delta=length-i; + if((length=cnv->UCharErrorBufferLength)>0) { + uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, + length*U_SIZEOF_UCHAR); + } + cnv->UCharErrorBufferLength=(int8_t)(length+delta); + + cnv->UCharErrorBuffer[0]=buffer[i++]; + if(delta>1) { + cnv->UCharErrorBuffer[1]=buffer[i]; + } + } + + *source=args.source; + return c; +} + +/* ucnv_convert() and siblings ---------------------------------------------- */ + +U_CAPI void U_EXPORT2 +ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, + char **target, const char *targetLimit, + const char **source, const char *sourceLimit, + UChar *pivotStart, UChar **pivotSource, + UChar **pivotTarget, const UChar *pivotLimit, + UBool reset, UBool flush, + UErrorCode *pErrorCode) { + UChar pivotBuffer[CHUNK_SIZE]; + const UChar *myPivotSource; + UChar *myPivotTarget; + const char *s; + char *t; + + UConverterToUnicodeArgs toUArgs; + UConverterFromUnicodeArgs fromUArgs; + UConverterConvert convert; + + /* error checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return; + } + + if( targetCnv==NULL || sourceCnv==NULL || + source==NULL || *source==NULL || + target==NULL || *target==NULL || targetLimit==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + s=*source; + t=*target; + if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* + * Make sure that the buffer sizes do not exceed the number range for + * int32_t. See ucnv_toUnicode() for a more detailed comment. + */ + if( + (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || + ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(pivotStart==NULL) { + if(!flush) { + /* streaming conversion requires an explicit pivot buffer */ + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + /* use the stack pivot buffer */ + myPivotSource=myPivotTarget=pivotStart=pivotBuffer; + pivotSource=(UChar **)&myPivotSource; + pivotTarget=&myPivotTarget; + pivotLimit=pivotBuffer+CHUNK_SIZE; + } else if( pivotStart>=pivotLimit || + pivotSource==NULL || *pivotSource==NULL || + pivotTarget==NULL || *pivotTarget==NULL || + pivotLimit==NULL + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + if(sourceLimit==NULL) { + /* get limit of single-byte-NUL-terminated source string */ + sourceLimit=uprv_strchr(*source, 0); + } + + if(reset) { + ucnv_resetToUnicode(sourceCnv); + ucnv_resetFromUnicode(targetCnv); + *pivotSource=*pivotTarget=pivotStart; + } else if(targetCnv->charErrorBufferLength>0) { + /* output the targetCnv overflow buffer */ + if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { + /* U_BUFFER_OVERFLOW_ERROR */ + return; + } + /* *target has moved, therefore stop using t */ + + if( !flush && + targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && + sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit + ) { + /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ + return; + } + } + + /* Is direct-UTF-8 conversion available? */ + if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && + targetCnv->sharedData->impl->fromUTF8!=NULL + ) { + convert=targetCnv->sharedData->impl->fromUTF8; + } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && + sourceCnv->sharedData->impl->toUTF8!=NULL + ) { + convert=sourceCnv->sharedData->impl->toUTF8; + } else { + convert=NULL; + } + + /* + * If direct-UTF-8 conversion is available, then we use a smaller + * pivot buffer for error handling and partial matches + * so that we quickly return to direct conversion. + * + * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. + * + * We could reduce the pivot buffer size further, at the cost of + * buffer overflows from callbacks. + * The pivot buffer should not be smaller than the maximum number of + * fromUnicode extension table input UChars + * (for m:n conversion, see + * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) + * or 2 for surrogate pairs. + * + * Too small a buffer can cause thrashing between pivoting and direct + * conversion, with function call overhead outweighing the benefits + * of direct conversion. + */ + if(convert!=NULL && (pivotLimit-pivotStart)>32) { + pivotLimit=pivotStart+32; + } + + /* prepare the converter arguments */ + fromUArgs.converter=targetCnv; + fromUArgs.flush=FALSE; + fromUArgs.offsets=NULL; + fromUArgs.target=*target; + fromUArgs.targetLimit=targetLimit; + fromUArgs.size=sizeof(fromUArgs); + + toUArgs.converter=sourceCnv; + toUArgs.flush=flush; + toUArgs.offsets=NULL; + toUArgs.source=s; + toUArgs.sourceLimit=sourceLimit; + toUArgs.targetLimit=pivotLimit; + toUArgs.size=sizeof(toUArgs); + + /* + * TODO: Consider separating this function into two functions, + * extracting exactly the conversion loop, + * for readability and to reduce the set of visible variables. + * + * Otherwise stop using s and t from here on. + */ + s=t=NULL; + + /* + * conversion loop + * + * The sequence of steps in the loop may appear backward, + * but the principle is simple: + * In the chain of + * source - sourceCnv overflow - pivot - targetCnv overflow - target + * empty out later buffers before refilling them from earlier ones. + * + * The targetCnv overflow buffer is flushed out only once before the loop. + */ + for(;;) { + /* + * if(pivot not empty or error or replay or flush fromUnicode) { + * fromUnicode(pivot -> target); + * } + * + * For pivoting conversion; and for direct conversion for + * error callback handling and flushing the replay buffer. + */ + if( *pivotSource<*pivotTarget || + U_FAILURE(*pErrorCode) || + targetCnv->preFromULength<0 || + fromUArgs.flush + ) { + fromUArgs.source=*pivotSource; + fromUArgs.sourceLimit=*pivotTarget; + _fromUnicodeWithCallback(&fromUArgs, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + /* target overflow, or conversion error */ + *pivotSource=(UChar *)fromUArgs.source; + break; + } + + /* + * _fromUnicodeWithCallback() must have consumed the pivot contents + * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() + */ + } + + /* The pivot buffer is empty; reset it so we start at pivotStart. */ + *pivotSource=*pivotTarget=pivotStart; + + /* + * if(sourceCnv overflow buffer not empty) { + * move(sourceCnv overflow buffer -> pivot); + * continue; + * } + */ + /* output the sourceCnv overflow buffer */ + if(sourceCnv->UCharErrorBufferLength>0) { + if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { + /* U_BUFFER_OVERFLOW_ERROR */ + *pErrorCode=U_ZERO_ERROR; + } + continue; + } + + /* + * check for end of input and break if done + * + * Checking both flush and fromUArgs.flush ensures that the converters + * have been called with the flush flag set if the ucnv_convertEx() + * caller set it. + */ + if( toUArgs.source==sourceLimit && + sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && + (!flush || fromUArgs.flush) + ) { + /* done successfully */ + break; + } + + /* + * use direct conversion if available + * but not if continuing a partial match + * or flushing the toUnicode replay buffer + */ + if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { + if(*pErrorCode==U_USING_DEFAULT_WARNING) { + /* remove a warning that may be set by this function */ + *pErrorCode=U_ZERO_ERROR; + } + convert(&fromUArgs, &toUArgs, pErrorCode); + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + break; + } else if(U_FAILURE(*pErrorCode)) { + if(sourceCnv->toULength>0) { + /* + * Fall through to calling _toUnicodeWithCallback() + * for callback handling. + * + * The pivot buffer will be reset with + * *pivotSource=*pivotTarget=pivotStart; + * which indicates a toUnicode error to the caller + * (*pivotSource==pivotStart shows no pivot UChars consumed). + */ + } else { + /* + * Indicate a fromUnicode error to the caller + * (*pivotSource>pivotStart shows some pivot UChars consumed). + */ + *pivotSource=*pivotTarget=pivotStart+1; + /* + * Loop around to calling _fromUnicodeWithCallbacks() + * for callback handling. + */ + continue; + } + } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { + /* + * No error, but the implementation requested to temporarily + * fall back to pivoting. + */ + *pErrorCode=U_ZERO_ERROR; + /* + * The following else branches are almost identical to the end-of-input + * handling in _toUnicodeWithCallback(). + * Avoid calling it just for the end of input. + */ + } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ + /* + * the entire input stream is consumed + * and there is a partial, truncated input sequence left + */ + + /* inject an error and continue with callback handling */ + *pErrorCode=U_TRUNCATED_CHAR_FOUND; + } else { + /* input consumed */ + if(flush) { + /* reset the converters without calling the callback functions */ + _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); + _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); + } + + /* done successfully */ + break; + } + } + + /* + * toUnicode(source -> pivot); + * + * For pivoting conversion; and for direct conversion for + * error callback handling, continuing partial matches + * and flushing the replay buffer. + * + * The pivot buffer is empty and reset. + */ + toUArgs.target=pivotStart; /* ==*pivotTarget */ + /* toUArgs.targetLimit=pivotLimit; already set before the loop */ + _toUnicodeWithCallback(&toUArgs, pErrorCode); + *pivotTarget=toUArgs.target; + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { + /* pivot overflow: continue with the conversion loop */ + *pErrorCode=U_ZERO_ERROR; + } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { + /* conversion error, or there was nothing left to convert */ + break; + } + /* + * else: + * _toUnicodeWithCallback() wrote into the pivot buffer, + * continue with fromUnicode conversion. + * + * Set the fromUnicode flush flag if we flush and if toUnicode has + * processed the end of the input. + */ + if( flush && toUArgs.source==sourceLimit && + sourceCnv->preToULength>=0 && + sourceCnv->UCharErrorBufferLength==0 + ) { + fromUArgs.flush=TRUE; + } + } + + /* + * The conversion loop is exited when one of the following is true: + * - the entire source text has been converted successfully to the target buffer + * - a target buffer overflow occurred + * - a conversion error occurred + */ + + *source=toUArgs.source; + *target=fromUArgs.target; + + /* terminate the target buffer if possible */ + if(flush && U_SUCCESS(*pErrorCode)) { + if(*target!=targetLimit) { + **target=0; + if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { + *pErrorCode=U_ZERO_ERROR; + } + } else { + *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; + } + } +} + +/* internal implementation of ucnv_convert() etc. with preflighting */ +static int32_t +ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UChar pivotBuffer[CHUNK_SIZE]; + UChar *pivot, *pivot2; + + char *myTarget; + const char *sourceLimit; + const char *targetLimit; + int32_t targetLength=0; + + /* set up */ + if(sourceLength<0) { + sourceLimit=uprv_strchr(source, 0); + } else { + sourceLimit=source+sourceLength; + } + + /* if there is no input data, we're done */ + if(source==sourceLimit) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + pivot=pivot2=pivotBuffer; + myTarget=target; + targetLength=0; + + if(targetCapacity>0) { + /* perform real conversion */ + targetLimit=target+targetCapacity; + ucnv_convertEx(outConverter, inConverter, + &myTarget, targetLimit, + &source, sourceLimit, + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, + FALSE, + TRUE, + pErrorCode); + targetLength=(int32_t)(myTarget-target); + } + + /* + * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing + * to it but continue the conversion in order to store in targetCapacity + * the number of bytes that was required. + */ + if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) + { + char targetBuffer[CHUNK_SIZE]; + + targetLimit=targetBuffer+CHUNK_SIZE; + do { + *pErrorCode=U_ZERO_ERROR; + myTarget=targetBuffer; + ucnv_convertEx(outConverter, inConverter, + &myTarget, targetLimit, + &source, sourceLimit, + pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, + FALSE, + TRUE, + pErrorCode); + targetLength+=(int32_t)(myTarget-targetBuffer); + } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); + + /* done with preflighting, set warnings and errors as appropriate */ + return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); + } + + /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ + return targetLength; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_convert(const char *toConverterName, const char *fromConverterName, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UConverter in, out; /* stack-allocated */ + UConverter *inConverter, *outConverter; + int32_t targetLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( source==NULL || sourceLength<-1 || + targetCapacity<0 || (targetCapacity>0 && target==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* if there is no input data, we're done */ + if(sourceLength==0 || (sourceLength<0 && *source==0)) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + /* create the converters */ + inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + ucnv_close(inConverter); + return 0; + } + + targetLength=ucnv_internalConvert(outConverter, inConverter, + target, targetCapacity, + source, sourceLength, + pErrorCode); + + ucnv_close(inConverter); + ucnv_close(outConverter); + + return targetLength; +} + +/* @internal */ +static int32_t +ucnv_convertAlgorithmic(UBool convertToAlgorithmic, + UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + UConverter algoConverterStatic; /* stack-allocated */ + UConverter *algoConverter, *to, *from; + int32_t targetLength; + + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if( cnv==NULL || source==NULL || sourceLength<-1 || + targetCapacity<0 || (targetCapacity>0 && target==NULL) + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + /* if there is no input data, we're done */ + if(sourceLength==0 || (sourceLength<0 && *source==0)) { + return u_terminateChars(target, targetCapacity, 0, pErrorCode); + } + + /* create the algorithmic converter */ + algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, + "", 0, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* reset the other converter */ + if(convertToAlgorithmic) { + /* cnv->Unicode->algo */ + ucnv_resetToUnicode(cnv); + to=algoConverter; + from=cnv; + } else { + /* algo->Unicode->cnv */ + ucnv_resetFromUnicode(cnv); + from=algoConverter; + to=cnv; + } + + targetLength=ucnv_internalConvert(to, from, + target, targetCapacity, + source, sourceLength, + pErrorCode); + + ucnv_close(algoConverter); + + return targetLength; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toAlgorithmic(UConverterType algorithmicType, + UConverter *cnv, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, + target, targetCapacity, + source, sourceLength, + pErrorCode); +} + +U_CAPI int32_t U_EXPORT2 +ucnv_fromAlgorithmic(UConverter *cnv, + UConverterType algorithmicType, + char *target, int32_t targetCapacity, + const char *source, int32_t sourceLength, + UErrorCode *pErrorCode) { + return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, + target, targetCapacity, + source, sourceLength, + pErrorCode); +} + +U_CAPI UConverterType U_EXPORT2 +ucnv_getType(const UConverter* converter) +{ + int8_t type = converter->sharedData->staticData->conversionType; +#if !UCONFIG_NO_LEGACY_CONVERSION + if(type == UCNV_MBCS) { + return ucnv_MBCSGetType(converter); + } +#endif + return (UConverterType)type; +} + +U_CAPI void U_EXPORT2 +ucnv_getStarters(const UConverter* converter, + UBool starters[256], + UErrorCode* err) +{ + if (err == NULL || U_FAILURE(*err)) { + return; + } + + if(converter->sharedData->impl->getStarters != NULL) { + converter->sharedData->impl->getStarters(converter, starters, err); + } else { + *err = U_ILLEGAL_ARGUMENT_ERROR; + } +} + +static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) +{ + UErrorCode errorCode; + const char *name; + int32_t i; + + if(cnv==NULL) { + return NULL; + } + + errorCode=U_ZERO_ERROR; + name=ucnv_getName(cnv, &errorCode); + if(U_FAILURE(errorCode)) { + return NULL; + } + + for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i) + { + if(0==uprv_strcmp(name, ambiguousConverters[i].name)) + { + return ambiguousConverters+i; + } + } + + return NULL; +} + +U_CAPI void U_EXPORT2 +ucnv_fixFileSeparator(const UConverter *cnv, + UChar* source, + int32_t sourceLength) { + const UAmbiguousConverter *a; + int32_t i; + UChar variant5c; + + if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) + { + return; + } + + variant5c=a->variant5c; + for(i=0; i<sourceLength; ++i) { + if(source[i]==variant5c) { + source[i]=0x5c; + } + } +} + +U_CAPI UBool U_EXPORT2 +ucnv_isAmbiguous(const UConverter *cnv) { + return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); +} + +U_CAPI void U_EXPORT2 +ucnv_setFallback(UConverter *cnv, UBool usesFallback) +{ + cnv->useFallback = usesFallback; +} + +U_CAPI UBool U_EXPORT2 +ucnv_usesFallback(const UConverter *cnv) +{ + return cnv->useFallback; +} + +U_CAPI void U_EXPORT2 +ucnv_getInvalidChars (const UConverter * converter, + char *errBytes, + int8_t * len, + UErrorCode * err) +{ + if (err == NULL || U_FAILURE(*err)) + { + return; + } + if (len == NULL || errBytes == NULL || converter == NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (*len < converter->invalidCharLength) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if ((*len = converter->invalidCharLength) > 0) + { + uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); + } +} + +U_CAPI void U_EXPORT2 +ucnv_getInvalidUChars (const UConverter * converter, + UChar *errChars, + int8_t * len, + UErrorCode * err) +{ + if (err == NULL || U_FAILURE(*err)) + { + return; + } + if (len == NULL || errChars == NULL || converter == NULL) + { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + if (*len < converter->invalidUCharLength) + { + *err = U_INDEX_OUTOFBOUNDS_ERROR; + return; + } + if ((*len = converter->invalidUCharLength) > 0) + { + u_memcpy (errChars, converter->invalidUCharBuffer, *len); + } +} + +#define SIG_MAX_LEN 5 + +U_CAPI const char* U_EXPORT2 +ucnv_detectUnicodeSignature( const char* source, + int32_t sourceLength, + int32_t* signatureLength, + UErrorCode* pErrorCode) { + int32_t dummy; + + /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN + * bytes we don't misdetect something + */ + char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; + int i = 0; + + if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ + return NULL; + } + + if(source == NULL || sourceLength < -1){ + *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + + if(signatureLength == NULL) { + signatureLength = &dummy; + } + + if(sourceLength==-1){ + sourceLength=(int32_t)uprv_strlen(source); + } + -{ - if (U_FAILURE (*err)) - return NULL; - if(converter->sharedData->impl->getName){ - const char* temp= converter->sharedData->impl->getName(converter); - if(temp) - return temp; - } - return converter->sharedData->staticData->name; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_getCCSID(const UConverter * converter, - UErrorCode * err) -{ - int32_t ccsid; - if (U_FAILURE (*err)) - return -1; - - ccsid = converter->sharedData->staticData->codepage; - if (ccsid == 0) { - /* Rare case. This is for cases like gb18030, - which doesn't have an IBM canonical name, but does have an IBM alias. */ - const char *standardName = ucnv_getStandardName(ucnv_getName(converter, err), "IBM", err); - if (U_SUCCESS(*err) && standardName) { - const char *ccsidStr = uprv_strchr(standardName, '-'); - if (ccsidStr) { - ccsid = (int32_t)atol(ccsidStr+1); /* +1 to skip '-' */ - } - } - } - return ccsid; -} - - -U_CAPI UConverterPlatform U_EXPORT2 -ucnv_getPlatform (const UConverter * converter, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return UCNV_UNKNOWN; - - return (UConverterPlatform)converter->sharedData->staticData->platform; -} - -U_CAPI void U_EXPORT2 - ucnv_getToUCallBack (const UConverter * converter, - UConverterToUCallback *action, - const void **context) -{ - *action = converter->fromCharErrorBehaviour; - *context = converter->toUContext; -} - -U_CAPI void U_EXPORT2 - ucnv_getFromUCallBack (const UConverter * converter, - UConverterFromUCallback *action, - const void **context) -{ - *action = converter->fromUCharErrorBehaviour; - *context = converter->fromUContext; -} - -U_CAPI void U_EXPORT2 -ucnv_setToUCallBack (UConverter * converter, - UConverterToUCallback newAction, - const void* newContext, - UConverterToUCallback *oldAction, - const void** oldContext, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - if (oldAction) *oldAction = converter->fromCharErrorBehaviour; - converter->fromCharErrorBehaviour = newAction; - if (oldContext) *oldContext = converter->toUContext; - converter->toUContext = newContext; -} - -U_CAPI void U_EXPORT2 -ucnv_setFromUCallBack (UConverter * converter, - UConverterFromUCallback newAction, - const void* newContext, - UConverterFromUCallback *oldAction, - const void** oldContext, - UErrorCode * err) -{ - if (U_FAILURE (*err)) - return; - if (oldAction) *oldAction = converter->fromUCharErrorBehaviour; - converter->fromUCharErrorBehaviour = newAction; - if (oldContext) *oldContext = converter->fromUContext; - converter->fromUContext = newContext; -} - -static void -_updateOffsets(int32_t *offsets, int32_t length, - int32_t sourceIndex, int32_t errorInputLength) { - int32_t *limit; - int32_t delta, offset; - - if(sourceIndex>=0) { - /* - * adjust each offset by adding the previous sourceIndex - * minus the length of the input sequence that caused an - * error, if any - */ - delta=sourceIndex-errorInputLength; - } else { - /* - * set each offset to -1 because this conversion function - * does not handle offsets - */ - delta=-1; - } - - limit=offsets+length; - if(delta==0) { - /* most common case, nothing to do */ - } else if(delta>0) { - /* add the delta to each offset (but not if the offset is <0) */ - while(offsets<limit) { - offset=*offsets; - if(offset>=0) { - *offsets=offset+delta; - } - ++offsets; - } - } else /* delta<0 */ { - /* - * set each offset to -1 because this conversion function - * does not handle offsets - * or the error input sequence started in a previous buffer - */ - while(offsets<limit) { - *offsets++=-1; - } - } -} - -/* ucnv_fromUnicode --------------------------------------------------------- */ - -/* - * Implementation note for m:n conversions - * - * While collecting source units to find the longest match for m:n conversion, - * some source units may need to be stored for a partial match. - * When a second buffer does not yield a match on all of the previously stored - * source units, then they must be "replayed", i.e., fed back into the converter. - * - * The code relies on the fact that replaying will not nest - - * converting a replay buffer will not result in a replay. - * This is because a replay is necessary only after the _continuation_ of a - * partial match failed, but a replay buffer is converted as a whole. - * It may result in some of its units being stored again for a partial match, - * but there will not be a continuation _during_ the replay which could fail. - * - * It is conceivable that a callback function could call the converter - * recursively in a way that causes another replay to be stored, but that - * would be an error in the callback function. - * Such violations will cause assertion failures in a debug build, - * and wrong output, but they will not cause a crash. - */ - -static void -_fromUnicodeWithCallback(UConverterFromUnicodeArgs *pArgs, UErrorCode *err) { - UConverterFromUnicode fromUnicode; - UConverter *cnv; - const UChar *s; - char *t; - int32_t *offsets; - int32_t sourceIndex; - int32_t errorInputLength; - UBool converterSawEndOfInput, calledCallback; - - /* variables for m:n conversion */ - UChar replay[UCNV_EXT_MAX_UCHARS]; - const UChar *realSource, *realSourceLimit; - int32_t realSourceIndex; - UBool realFlush; - - cnv=pArgs->converter; - s=pArgs->source; - t=pArgs->target; - offsets=pArgs->offsets; - - /* get the converter implementation function */ - sourceIndex=0; - if(offsets==NULL) { - fromUnicode=cnv->sharedData->impl->fromUnicode; - } else { - fromUnicode=cnv->sharedData->impl->fromUnicodeWithOffsets; - if(fromUnicode==NULL) { - /* there is no WithOffsets implementation */ - fromUnicode=cnv->sharedData->impl->fromUnicode; - /* we will write -1 for each offset */ - sourceIndex=-1; - } - } - - if(cnv->preFromULength>=0) { - /* normal mode */ - realSource=NULL; - - /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ - realSourceLimit=NULL; - realFlush=FALSE; - realSourceIndex=0; - } else { - /* - * Previous m:n conversion stored source units from a partial match - * and failed to consume all of them. - * We need to "replay" them from a temporary buffer and convert them first. - */ - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preFromULength; - pArgs->flush=FALSE; - sourceIndex=-1; - - cnv->preFromULength=0; - } - - /* - * loop for conversion and error handling - * - * loop { - * convert - * loop { - * update offsets - * handle end of input - * handle errors/call callback - * } - * } - */ - for(;;) { - if(U_SUCCESS(*err)) { - /* convert */ - fromUnicode(pArgs, err); - - /* - * set a flag for whether the converter - * successfully processed the end of the input - * - * need not check cnv->preFromULength==0 because a replay (<0) will cause - * s<sourceLimit before converterSawEndOfInput is checked - */ - converterSawEndOfInput= - (UBool)(U_SUCCESS(*err) && - pArgs->flush && pArgs->source==pArgs->sourceLimit && - cnv->fromUChar32==0); - } else { - /* handle error from ucnv_convertEx() */ - converterSawEndOfInput=FALSE; - } - - /* no callback called yet for this iteration */ - calledCallback=FALSE; - - /* no sourceIndex adjustment for conversion, only for callback output */ - errorInputLength=0; - - /* - * loop for offsets and error handling - * - * iterates at most 3 times: - * 1. to clean up after the conversion function - * 2. after the callback - * 3. after the callback again if there was truncated input - */ - for(;;) { - /* update offsets if we write any */ - if(offsets!=NULL) { - int32_t length=(int32_t)(pArgs->target-t); - if(length>0) { - _updateOffsets(offsets, length, sourceIndex, errorInputLength); - - /* - * if a converter handles offsets and updates the offsets - * pointer at the end, then pArgs->offset should not change - * here; - * however, some converters do not handle offsets at all - * (sourceIndex<0) or may not update the offsets pointer - */ - pArgs->offsets=offsets+=length; - } - - if(sourceIndex>=0) { - sourceIndex+=(int32_t)(pArgs->source-s); - } - } - - if(cnv->preFromULength<0) { - /* - * switch the source to new replay units (cannot occur while replaying) - * after offset handling and before end-of-input and callback handling - */ - if(realSource==NULL) { - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preFromU, -cnv->preFromULength*U_SIZEOF_UCHAR); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preFromULength; - pArgs->flush=FALSE; - if((sourceIndex+=cnv->preFromULength)<0) { - sourceIndex=-1; - } - - cnv->preFromULength=0; - } else { - /* see implementation note before _fromUnicodeWithCallback() */ - U_ASSERT(realSource==NULL); - *err=U_INTERNAL_PROGRAM_ERROR; - } - } - - /* update pointers */ - s=pArgs->source; - t=pArgs->target; - - if(U_SUCCESS(*err)) { - if(s<pArgs->sourceLimit) { - /* - * continue with the conversion loop while there is still input left - * (continue converting by breaking out of only the inner loop) - */ - break; - } else if(realSource!=NULL) { - /* switch back from replaying to the real source and continue */ - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - sourceIndex=realSourceIndex; - - realSource=NULL; - break; - } else if(pArgs->flush && cnv->fromUChar32!=0) { - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *err=U_TRUNCATED_CHAR_FOUND; - calledCallback=FALSE; /* new error condition */ - } else { - /* input consumed */ - if(pArgs->flush) { - /* - * return to the conversion loop once more if the flush - * flag is set and the conversion function has not - * successfully processed the end of the input yet - * - * (continue converting by breaking out of only the inner loop) - */ - if(!converterSawEndOfInput) { - break; - } - - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_FROM_UNICODE, FALSE); - } - - /* done successfully */ - return; - } - } - - /* U_FAILURE(*err) */ - { - UErrorCode e; - - if( calledCallback || - (e=*err)==U_BUFFER_OVERFLOW_ERROR || - (e!=U_INVALID_CHAR_FOUND && - e!=U_ILLEGAL_CHAR_FOUND && - e!=U_TRUNCATED_CHAR_FOUND) - ) { - /* - * the callback did not or cannot resolve the error: - * set output pointers and return - * - * the check for buffer overflow is redundant but it is - * a high-runner case and hopefully documents the intent - * well - * - * if we were replaying, then the replay buffer must be - * copied back into the UConverter - * and the real arguments must be restored - */ - if(realSource!=NULL) { - int32_t length; - - U_ASSERT(cnv->preFromULength==0); - - length=(int32_t)(pArgs->sourceLimit-pArgs->source); - if(length>0) { - u_memcpy(cnv->preFromU, pArgs->source, length); - cnv->preFromULength=(int8_t)-length; - } - - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - } - - return; - } - } - - /* callback handling */ - { - UChar32 codePoint; - - /* get and write the code point */ - codePoint=cnv->fromUChar32; - errorInputLength=0; - U16_APPEND_UNSAFE(cnv->invalidUCharBuffer, errorInputLength, codePoint); - cnv->invalidUCharLength=(int8_t)errorInputLength; - - /* set the converter state to deal with the next character */ - cnv->fromUChar32=0; - - /* call the callback function */ - cnv->fromUCharErrorBehaviour(cnv->fromUContext, pArgs, - cnv->invalidUCharBuffer, errorInputLength, codePoint, - *err==U_INVALID_CHAR_FOUND ? UCNV_UNASSIGNED : UCNV_ILLEGAL, - err); - } - - /* - * loop back to the offset handling - * - * this flag will indicate after offset handling - * that a callback was called; - * if the callback did not resolve the error, then we return - */ - calledCallback=TRUE; - } - } -} - -/* - * Output the fromUnicode overflow buffer. - * Call this function if(cnv->charErrorBufferLength>0). - * @return TRUE if overflow - */ -static UBool -ucnv_outputOverflowFromUnicode(UConverter *cnv, - char **target, const char *targetLimit, - int32_t **pOffsets, - UErrorCode *err) { - int32_t *offsets; - char *overflow, *t; - int32_t i, length; - - t=*target; - if(pOffsets!=NULL) { - offsets=*pOffsets; - } else { - offsets=NULL; - } - - overflow=(char *)cnv->charErrorBuffer; - length=cnv->charErrorBufferLength; - i=0; - while(i<length) { - if(t==targetLimit) { - /* the overflow buffer contains too much, keep the rest */ - int32_t j=0; - - do { - overflow[j++]=overflow[i++]; - } while(i<length); - - cnv->charErrorBufferLength=(int8_t)j; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - *err=U_BUFFER_OVERFLOW_ERROR; - return TRUE; - } - - /* copy the overflow contents to the target */ - *t++=overflow[i++]; - if(offsets!=NULL) { - *offsets++=-1; /* no source index available for old output */ - } - } - - /* the overflow buffer is completely copied to the target */ - cnv->charErrorBufferLength=0; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - return FALSE; -} - -U_CAPI void U_EXPORT2 -ucnv_fromUnicode(UConverter *cnv, - char **target, const char *targetLimit, - const UChar **source, const UChar *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err) { - UConverterFromUnicodeArgs args; - const UChar *s; - char *t; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return; - } - - if(cnv==NULL || target==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - - if ((const void *)U_MAX_PTR(sourceLimit) == (const void *)sourceLimit) { - /* - Prevent code from going into an infinite loop in case we do hit this - limit. The limit pointer is expected to be on a UChar * boundary. - This also prevents the next argument check from failing. - */ - sourceLimit = (const UChar *)(((const char *)sourceLimit) - 1); - } - - /* - * All these conditions should never happen. - * - * 1) Make sure that the limits are >= to the address source or target - * - * 2) Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be targetLimit=t+0x7fffffff; for example. - * - * 3) Make sure that the user didn't incorrectly cast a UChar * pointer - * to a char * pointer and provide an incomplete UChar code unit. - */ - if (sourceLimit<s || targetLimit<t || - ((size_t)(sourceLimit-s)>(size_t)0x3fffffff && sourceLimit>s) || - ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) || - (((const char *)sourceLimit-(const char *)s) & 1) != 0) - { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* output the target overflow buffer */ - if( cnv->charErrorBufferLength>0 && - ucnv_outputOverflowFromUnicode(cnv, target, targetLimit, &offsets, err) - ) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target may have moved, therefore stop using t */ - - if(!flush && s==sourceLimit && cnv->preFromULength>=0) { - /* the overflow buffer is emptied and there is no new input: we are done */ - return; - } - - /* - * Do not simply return with a buffer overflow error if - * !flush && t==targetLimit - * because it is possible that the source will not generate any output. - * For example, the skip callback may be called; - * it does not output anything. - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=flush; - args.offsets=offsets; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=*target; - args.targetLimit=targetLimit; - args.size=sizeof(args); - - _fromUnicodeWithCallback(&args, err); - - *source=args.source; - *target=args.target; -} - -/* ucnv_toUnicode() --------------------------------------------------------- */ - -static void -_toUnicodeWithCallback(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { - UConverterToUnicode toUnicode; - UConverter *cnv; - const char *s; - UChar *t; - int32_t *offsets; - int32_t sourceIndex; - int32_t errorInputLength; - UBool converterSawEndOfInput, calledCallback; - - /* variables for m:n conversion */ - char replay[UCNV_EXT_MAX_BYTES]; - const char *realSource, *realSourceLimit; - int32_t realSourceIndex; - UBool realFlush; - - cnv=pArgs->converter; - s=pArgs->source; - t=pArgs->target; - offsets=pArgs->offsets; - - /* get the converter implementation function */ - sourceIndex=0; - if(offsets==NULL) { - toUnicode=cnv->sharedData->impl->toUnicode; - } else { - toUnicode=cnv->sharedData->impl->toUnicodeWithOffsets; - if(toUnicode==NULL) { - /* there is no WithOffsets implementation */ - toUnicode=cnv->sharedData->impl->toUnicode; - /* we will write -1 for each offset */ - sourceIndex=-1; - } - } - - if(cnv->preToULength>=0) { - /* normal mode */ - realSource=NULL; - - /* avoid compiler warnings - not otherwise necessary, and the values do not matter */ - realSourceLimit=NULL; - realFlush=FALSE; - realSourceIndex=0; - } else { - /* - * Previous m:n conversion stored source units from a partial match - * and failed to consume all of them. - * We need to "replay" them from a temporary buffer and convert them first. - */ - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preToULength; - pArgs->flush=FALSE; - sourceIndex=-1; - - cnv->preToULength=0; - } - - /* - * loop for conversion and error handling - * - * loop { - * convert - * loop { - * update offsets - * handle end of input - * handle errors/call callback - * } - * } - */ - for(;;) { - if(U_SUCCESS(*err)) { - /* convert */ - toUnicode(pArgs, err); - - /* - * set a flag for whether the converter - * successfully processed the end of the input - * - * need not check cnv->preToULength==0 because a replay (<0) will cause - * s<sourceLimit before converterSawEndOfInput is checked - */ - converterSawEndOfInput= - (UBool)(U_SUCCESS(*err) && - pArgs->flush && pArgs->source==pArgs->sourceLimit && - cnv->toULength==0); - } else { - /* handle error from getNextUChar() or ucnv_convertEx() */ - converterSawEndOfInput=FALSE; - } - - /* no callback called yet for this iteration */ - calledCallback=FALSE; - - /* no sourceIndex adjustment for conversion, only for callback output */ - errorInputLength=0; - - /* - * loop for offsets and error handling - * - * iterates at most 3 times: - * 1. to clean up after the conversion function - * 2. after the callback - * 3. after the callback again if there was truncated input - */ - for(;;) { - /* update offsets if we write any */ - if(offsets!=NULL) { - int32_t length=(int32_t)(pArgs->target-t); - if(length>0) { - _updateOffsets(offsets, length, sourceIndex, errorInputLength); - - /* - * if a converter handles offsets and updates the offsets - * pointer at the end, then pArgs->offset should not change - * here; - * however, some converters do not handle offsets at all - * (sourceIndex<0) or may not update the offsets pointer - */ - pArgs->offsets=offsets+=length; - } - - if(sourceIndex>=0) { - sourceIndex+=(int32_t)(pArgs->source-s); - } - } - - if(cnv->preToULength<0) { - /* - * switch the source to new replay units (cannot occur while replaying) - * after offset handling and before end-of-input and callback handling - */ - if(realSource==NULL) { - realSource=pArgs->source; - realSourceLimit=pArgs->sourceLimit; - realFlush=pArgs->flush; - realSourceIndex=sourceIndex; - - uprv_memcpy(replay, cnv->preToU, -cnv->preToULength); - pArgs->source=replay; - pArgs->sourceLimit=replay-cnv->preToULength; - pArgs->flush=FALSE; - if((sourceIndex+=cnv->preToULength)<0) { - sourceIndex=-1; - } - - cnv->preToULength=0; - } else { - /* see implementation note before _fromUnicodeWithCallback() */ - U_ASSERT(realSource==NULL); - *err=U_INTERNAL_PROGRAM_ERROR; - } - } - - /* update pointers */ - s=pArgs->source; - t=pArgs->target; - - if(U_SUCCESS(*err)) { - if(s<pArgs->sourceLimit) { - /* - * continue with the conversion loop while there is still input left - * (continue converting by breaking out of only the inner loop) - */ - break; - } else if(realSource!=NULL) { - /* switch back from replaying to the real source and continue */ - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - sourceIndex=realSourceIndex; - - realSource=NULL; - break; - } else if(pArgs->flush && cnv->toULength>0) { - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *err=U_TRUNCATED_CHAR_FOUND; - calledCallback=FALSE; /* new error condition */ - } else { - /* input consumed */ - if(pArgs->flush) { - /* - * return to the conversion loop once more if the flush - * flag is set and the conversion function has not - * successfully processed the end of the input yet - * - * (continue converting by breaking out of only the inner loop) - */ - if(!converterSawEndOfInput) { - break; - } - - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); - } - - /* done successfully */ - return; - } - } - - /* U_FAILURE(*err) */ - { - UErrorCode e; - - if( calledCallback || - (e=*err)==U_BUFFER_OVERFLOW_ERROR || - (e!=U_INVALID_CHAR_FOUND && - e!=U_ILLEGAL_CHAR_FOUND && - e!=U_TRUNCATED_CHAR_FOUND && - e!=U_ILLEGAL_ESCAPE_SEQUENCE && - e!=U_UNSUPPORTED_ESCAPE_SEQUENCE) - ) { - /* - * the callback did not or cannot resolve the error: - * set output pointers and return - * - * the check for buffer overflow is redundant but it is - * a high-runner case and hopefully documents the intent - * well - * - * if we were replaying, then the replay buffer must be - * copied back into the UConverter - * and the real arguments must be restored - */ - if(realSource!=NULL) { - int32_t length; - - U_ASSERT(cnv->preToULength==0); - - length=(int32_t)(pArgs->sourceLimit-pArgs->source); - if(length>0) { - uprv_memcpy(cnv->preToU, pArgs->source, length); - cnv->preToULength=(int8_t)-length; - } - - pArgs->source=realSource; - pArgs->sourceLimit=realSourceLimit; - pArgs->flush=realFlush; - } - - return; - } - } - - /* copy toUBytes[] to invalidCharBuffer[] */ - errorInputLength=cnv->invalidCharLength=cnv->toULength; - if(errorInputLength>0) { - uprv_memcpy(cnv->invalidCharBuffer, cnv->toUBytes, errorInputLength); - } - - /* set the converter state to deal with the next character */ - cnv->toULength=0; - - /* call the callback function */ - if(cnv->toUCallbackReason==UCNV_ILLEGAL && *err==U_INVALID_CHAR_FOUND) { - cnv->toUCallbackReason = UCNV_UNASSIGNED; - } - cnv->fromCharErrorBehaviour(cnv->toUContext, pArgs, - cnv->invalidCharBuffer, errorInputLength, - cnv->toUCallbackReason, - err); - cnv->toUCallbackReason = UCNV_ILLEGAL; /* reset to default value */ - - /* - * loop back to the offset handling - * - * this flag will indicate after offset handling - * that a callback was called; - * if the callback did not resolve the error, then we return - */ - calledCallback=TRUE; - } - } -} - -/* - * Output the toUnicode overflow buffer. - * Call this function if(cnv->UCharErrorBufferLength>0). - * @return TRUE if overflow - */ -static UBool -ucnv_outputOverflowToUnicode(UConverter *cnv, - UChar **target, const UChar *targetLimit, - int32_t **pOffsets, - UErrorCode *err) { - int32_t *offsets; - UChar *overflow, *t; - int32_t i, length; - - t=*target; - if(pOffsets!=NULL) { - offsets=*pOffsets; - } else { - offsets=NULL; - } - - overflow=cnv->UCharErrorBuffer; - length=cnv->UCharErrorBufferLength; - i=0; - while(i<length) { - if(t==targetLimit) { - /* the overflow buffer contains too much, keep the rest */ - int32_t j=0; - - do { - overflow[j++]=overflow[i++]; - } while(i<length); - - cnv->UCharErrorBufferLength=(int8_t)j; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - *err=U_BUFFER_OVERFLOW_ERROR; - return TRUE; - } - - /* copy the overflow contents to the target */ - *t++=overflow[i++]; - if(offsets!=NULL) { - *offsets++=-1; /* no source index available for old output */ - } - } - - /* the overflow buffer is completely copied to the target */ - cnv->UCharErrorBufferLength=0; - *target=t; - if(offsets!=NULL) { - *pOffsets=offsets; - } - return FALSE; -} - -U_CAPI void U_EXPORT2 -ucnv_toUnicode(UConverter *cnv, - UChar **target, const UChar *targetLimit, - const char **source, const char *sourceLimit, - int32_t *offsets, - UBool flush, - UErrorCode *err) { - UConverterToUnicodeArgs args; - const char *s; - UChar *t; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return; - } - - if(cnv==NULL || target==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - - if ((const void *)U_MAX_PTR(targetLimit) == (const void *)targetLimit) { - /* - Prevent code from going into an infinite loop in case we do hit this - limit. The limit pointer is expected to be on a UChar * boundary. - This also prevents the next argument check from failing. - */ - targetLimit = (const UChar *)(((const char *)targetLimit) - 1); - } - - /* - * All these conditions should never happen. - * - * 1) Make sure that the limits are >= to the address source or target - * - * 2) Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be sourceLimit=t+0x7fffffff; for example. - * - * 3) Make sure that the user didn't incorrectly cast a UChar * pointer - * to a char * pointer and provide an incomplete UChar code unit. - */ - if (sourceLimit<s || targetLimit<t || - ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s) || - ((size_t)(targetLimit-t)>(size_t)0x3fffffff && targetLimit>t) || - (((const char *)targetLimit-(const char *)t) & 1) != 0 - ) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* output the target overflow buffer */ - if( cnv->UCharErrorBufferLength>0 && - ucnv_outputOverflowToUnicode(cnv, target, targetLimit, &offsets, err) - ) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target may have moved, therefore stop using t */ - - if(!flush && s==sourceLimit && cnv->preToULength>=0) { - /* the overflow buffer is emptied and there is no new input: we are done */ - return; - } - - /* - * Do not simply return with a buffer overflow error if - * !flush && t==targetLimit - * because it is possible that the source will not generate any output. - * For example, the skip callback may be called; - * it does not output anything. - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=flush; - args.offsets=offsets; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=*target; - args.targetLimit=targetLimit; - args.size=sizeof(args); - - _toUnicodeWithCallback(&args, err); - - *source=args.source; - *target=args.target; -} - -/* ucnv_to/fromUChars() ----------------------------------------------------- */ - -U_CAPI int32_t U_EXPORT2 -ucnv_fromUChars(UConverter *cnv, - char *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const UChar *srcLimit; - char *originalDest, *destLimit; - int32_t destLength; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || - destCapacity<0 || (destCapacity>0 && dest==NULL) || - srcLength<-1 || (srcLength!=0 && src==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* initialize */ - ucnv_resetFromUnicode(cnv); - originalDest=dest; - if(srcLength==-1) { - srcLength=u_strlen(src); - } - if(srcLength>0) { - srcLimit=src+srcLength; - destCapacity=pinCapacity(dest, destCapacity); - destLimit=dest+destCapacity; - - /* perform the conversion */ - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength=(int32_t)(dest-originalDest); - - /* if an overflow occurs, then get the preflighting length */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - char buffer[1024]; - - destLimit=buffer+sizeof(buffer); - do { - dest=buffer; - *pErrorCode=U_ZERO_ERROR; - ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength+=(int32_t)(dest-buffer); - } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - } - } else { - destLength=0; - } - - return u_terminateChars(originalDest, destCapacity, destLength, pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toUChars(UConverter *cnv, - UChar *dest, int32_t destCapacity, - const char *src, int32_t srcLength, - UErrorCode *pErrorCode) { - const char *srcLimit; - UChar *originalDest, *destLimit; - int32_t destLength; - - /* check arguments */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || - destCapacity<0 || (destCapacity>0 && dest==NULL) || - srcLength<-1 || (srcLength!=0 && src==NULL)) - { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* initialize */ - ucnv_resetToUnicode(cnv); - originalDest=dest; - if(srcLength==-1) { - srcLength=(int32_t)uprv_strlen(src); - } - if(srcLength>0) { - srcLimit=src+srcLength; - destCapacity=pinCapacity(dest, destCapacity); - destLimit=dest+destCapacity; - - /* perform the conversion */ - ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength=(int32_t)(dest-originalDest); - - /* if an overflow occurs, then get the preflighting length */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) - { - UChar buffer[1024]; - - destLimit=buffer+UPRV_LENGTHOF(buffer); - do { - dest=buffer; - *pErrorCode=U_ZERO_ERROR; - ucnv_toUnicode(cnv, &dest, destLimit, &src, srcLimit, 0, TRUE, pErrorCode); - destLength+=(int32_t)(dest-buffer); - } - while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - } - } else { - destLength=0; - } - - return u_terminateUChars(originalDest, destCapacity, destLength, pErrorCode); -} - -/* ucnv_getNextUChar() ------------------------------------------------------ */ - -U_CAPI UChar32 U_EXPORT2 -ucnv_getNextUChar(UConverter *cnv, - const char **source, const char *sourceLimit, - UErrorCode *err) { - UConverterToUnicodeArgs args; - UChar buffer[U16_MAX_LENGTH]; - const char *s; - UChar32 c; - int32_t i, length; - - /* check parameters */ - if(err==NULL || U_FAILURE(*err)) { - return 0xffff; - } - - if(cnv==NULL || source==NULL) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - - s=*source; - if(sourceLimit<s) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - - /* - * Make sure that the buffer sizes do not exceed the number range for - * int32_t because some functions use the size (in units or bytes) - * rather than comparing pointers, and because offsets are int32_t values. - * - * size_t is guaranteed to be unsigned and large enough for the job. - * - * Return with an error instead of adjusting the limits because we would - * not be able to maintain the semantics that either the source must be - * consumed or the target filled (unless an error occurs). - * An adjustment would be sourceLimit=t+0x7fffffff; for example. - */ - if(((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) { - *err=U_ILLEGAL_ARGUMENT_ERROR; - return 0xffff; - } - - c=U_SENTINEL; - - /* flush the target overflow buffer */ - if(cnv->UCharErrorBufferLength>0) { - UChar *overflow; - - overflow=cnv->UCharErrorBuffer; - i=0; - length=cnv->UCharErrorBufferLength; - U16_NEXT(overflow, i, length, c); - - /* move the remaining overflow contents up to the beginning */ - if((cnv->UCharErrorBufferLength=(int8_t)(length-i))>0) { - uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+i, - cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); - } - - if(!U16_IS_LEAD(c) || i<length) { - return c; - } - /* - * Continue if the overflow buffer contained only a lead surrogate, - * in case the converter outputs single surrogates from complete - * input sequences. - */ - } - - /* - * flush==TRUE is implied for ucnv_getNextUChar() - * - * do not simply return even if s==sourceLimit because the converter may - * not have seen flush==TRUE before - */ - - /* prepare the converter arguments */ - args.converter=cnv; - args.flush=TRUE; - args.offsets=NULL; - args.source=s; - args.sourceLimit=sourceLimit; - args.target=buffer; - args.targetLimit=buffer+1; - args.size=sizeof(args); - - if(c<0) { - /* - * call the native getNextUChar() implementation if we are - * at a character boundary (toULength==0) - * - * unlike with _toUnicode(), getNextUChar() implementations must set - * U_TRUNCATED_CHAR_FOUND for truncated input, - * in addition to setting toULength/toUBytes[] - */ - if(cnv->toULength==0 && cnv->sharedData->impl->getNextUChar!=NULL) { - c=cnv->sharedData->impl->getNextUChar(&args, err); - *source=s=args.source; - if(*err==U_INDEX_OUTOFBOUNDS_ERROR) { - /* reset the converter without calling the callback function */ - _reset(cnv, UCNV_RESET_TO_UNICODE, FALSE); - return 0xffff; /* no output */ - } else if(U_SUCCESS(*err) && c>=0) { - return c; - /* - * else fall through to use _toUnicode() because - * UCNV_GET_NEXT_UCHAR_USE_TO_U: the native function did not want to handle it after all - * U_FAILURE: call _toUnicode() for callback handling (do not output c) - */ - } - } - - /* convert to one UChar in buffer[0], or handle getNextUChar() errors */ - _toUnicodeWithCallback(&args, err); - - if(*err==U_BUFFER_OVERFLOW_ERROR) { - *err=U_ZERO_ERROR; - } - - i=0; - length=(int32_t)(args.target-buffer); - } else { - /* write the lead surrogate from the overflow buffer */ - buffer[0]=(UChar)c; - args.target=buffer+1; - i=0; - length=1; - } - - /* buffer contents starts at i and ends before length */ - - if(U_FAILURE(*err)) { - c=0xffff; /* no output */ - } else if(length==0) { - /* no input or only state changes */ - *err=U_INDEX_OUTOFBOUNDS_ERROR; - /* no need to reset explicitly because _toUnicodeWithCallback() did it */ - c=0xffff; /* no output */ - } else { - c=buffer[0]; - i=1; - if(!U16_IS_LEAD(c)) { - /* consume c=buffer[0], done */ - } else { - /* got a lead surrogate, see if a trail surrogate follows */ - UChar c2; - - if(cnv->UCharErrorBufferLength>0) { - /* got overflow output from the conversion */ - if(U16_IS_TRAIL(c2=cnv->UCharErrorBuffer[0])) { - /* got a trail surrogate, too */ - c=U16_GET_SUPPLEMENTARY(c, c2); - - /* move the remaining overflow contents up to the beginning */ - if((--cnv->UCharErrorBufferLength)>0) { - uprv_memmove(cnv->UCharErrorBuffer, cnv->UCharErrorBuffer+1, - cnv->UCharErrorBufferLength*U_SIZEOF_UCHAR); - } - } else { - /* c is an unpaired lead surrogate, just return it */ - } - } else if(args.source<sourceLimit) { - /* convert once more, to buffer[1] */ - args.targetLimit=buffer+2; - _toUnicodeWithCallback(&args, err); - if(*err==U_BUFFER_OVERFLOW_ERROR) { - *err=U_ZERO_ERROR; - } - - length=(int32_t)(args.target-buffer); - if(U_SUCCESS(*err) && length==2 && U16_IS_TRAIL(c2=buffer[1])) { - /* got a trail surrogate, too */ - c=U16_GET_SUPPLEMENTARY(c, c2); - i=2; - } - } - } - } - - /* - * move leftover output from buffer[i..length[ - * into the beginning of the overflow buffer - */ - if(i<length) { - /* move further overflow back */ - int32_t delta=length-i; - if((length=cnv->UCharErrorBufferLength)>0) { - uprv_memmove(cnv->UCharErrorBuffer+delta, cnv->UCharErrorBuffer, - length*U_SIZEOF_UCHAR); - } - cnv->UCharErrorBufferLength=(int8_t)(length+delta); - - cnv->UCharErrorBuffer[0]=buffer[i++]; - if(delta>1) { - cnv->UCharErrorBuffer[1]=buffer[i]; - } - } - - *source=args.source; - return c; -} - -/* ucnv_convert() and siblings ---------------------------------------------- */ - -U_CAPI void U_EXPORT2 -ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv, - char **target, const char *targetLimit, - const char **source, const char *sourceLimit, - UChar *pivotStart, UChar **pivotSource, - UChar **pivotTarget, const UChar *pivotLimit, - UBool reset, UBool flush, - UErrorCode *pErrorCode) { - UChar pivotBuffer[CHUNK_SIZE]; - const UChar *myPivotSource; - UChar *myPivotTarget; - const char *s; - char *t; - - UConverterToUnicodeArgs toUArgs; - UConverterFromUnicodeArgs fromUArgs; - UConverterConvert convert; - - /* error checking */ - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return; - } - - if( targetCnv==NULL || sourceCnv==NULL || - source==NULL || *source==NULL || - target==NULL || *target==NULL || targetLimit==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - s=*source; - t=*target; - if((sourceLimit!=NULL && sourceLimit<s) || targetLimit<t) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* - * Make sure that the buffer sizes do not exceed the number range for - * int32_t. See ucnv_toUnicode() for a more detailed comment. - */ - if( - (sourceLimit!=NULL && ((size_t)(sourceLimit-s)>(size_t)0x7fffffff && sourceLimit>s)) || - ((size_t)(targetLimit-t)>(size_t)0x7fffffff && targetLimit>t) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(pivotStart==NULL) { - if(!flush) { - /* streaming conversion requires an explicit pivot buffer */ - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - /* use the stack pivot buffer */ - myPivotSource=myPivotTarget=pivotStart=pivotBuffer; - pivotSource=(UChar **)&myPivotSource; - pivotTarget=&myPivotTarget; - pivotLimit=pivotBuffer+CHUNK_SIZE; - } else if( pivotStart>=pivotLimit || - pivotSource==NULL || *pivotSource==NULL || - pivotTarget==NULL || *pivotTarget==NULL || - pivotLimit==NULL - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - if(sourceLimit==NULL) { - /* get limit of single-byte-NUL-terminated source string */ - sourceLimit=uprv_strchr(*source, 0); - } - - if(reset) { - ucnv_resetToUnicode(sourceCnv); - ucnv_resetFromUnicode(targetCnv); - *pivotSource=*pivotTarget=pivotStart; - } else if(targetCnv->charErrorBufferLength>0) { - /* output the targetCnv overflow buffer */ - if(ucnv_outputOverflowFromUnicode(targetCnv, target, targetLimit, NULL, pErrorCode)) { - /* U_BUFFER_OVERFLOW_ERROR */ - return; - } - /* *target has moved, therefore stop using t */ - - if( !flush && - targetCnv->preFromULength>=0 && *pivotSource==*pivotTarget && - sourceCnv->UCharErrorBufferLength==0 && sourceCnv->preToULength>=0 && s==sourceLimit - ) { - /* the fromUnicode overflow buffer is emptied and there is no new input: we are done */ - return; - } - } - - /* Is direct-UTF-8 conversion available? */ - if( sourceCnv->sharedData->staticData->conversionType==UCNV_UTF8 && - targetCnv->sharedData->impl->fromUTF8!=NULL - ) { - convert=targetCnv->sharedData->impl->fromUTF8; - } else if( targetCnv->sharedData->staticData->conversionType==UCNV_UTF8 && - sourceCnv->sharedData->impl->toUTF8!=NULL - ) { - convert=sourceCnv->sharedData->impl->toUTF8; - } else { - convert=NULL; - } - - /* - * If direct-UTF-8 conversion is available, then we use a smaller - * pivot buffer for error handling and partial matches - * so that we quickly return to direct conversion. - * - * 32 is large enough for UCNV_EXT_MAX_UCHARS and UCNV_ERROR_BUFFER_LENGTH. - * - * We could reduce the pivot buffer size further, at the cost of - * buffer overflows from callbacks. - * The pivot buffer should not be smaller than the maximum number of - * fromUnicode extension table input UChars - * (for m:n conversion, see - * targetCnv->sharedData->mbcs.extIndexes[UCNV_EXT_COUNT_UCHARS]) - * or 2 for surrogate pairs. - * - * Too small a buffer can cause thrashing between pivoting and direct - * conversion, with function call overhead outweighing the benefits - * of direct conversion. - */ - if(convert!=NULL && (pivotLimit-pivotStart)>32) { - pivotLimit=pivotStart+32; - } - - /* prepare the converter arguments */ - fromUArgs.converter=targetCnv; - fromUArgs.flush=FALSE; - fromUArgs.offsets=NULL; - fromUArgs.target=*target; - fromUArgs.targetLimit=targetLimit; - fromUArgs.size=sizeof(fromUArgs); - - toUArgs.converter=sourceCnv; - toUArgs.flush=flush; - toUArgs.offsets=NULL; - toUArgs.source=s; - toUArgs.sourceLimit=sourceLimit; - toUArgs.targetLimit=pivotLimit; - toUArgs.size=sizeof(toUArgs); - - /* - * TODO: Consider separating this function into two functions, - * extracting exactly the conversion loop, - * for readability and to reduce the set of visible variables. - * - * Otherwise stop using s and t from here on. - */ - s=t=NULL; - - /* - * conversion loop - * - * The sequence of steps in the loop may appear backward, - * but the principle is simple: - * In the chain of - * source - sourceCnv overflow - pivot - targetCnv overflow - target - * empty out later buffers before refilling them from earlier ones. - * - * The targetCnv overflow buffer is flushed out only once before the loop. - */ - for(;;) { - /* - * if(pivot not empty or error or replay or flush fromUnicode) { - * fromUnicode(pivot -> target); - * } - * - * For pivoting conversion; and for direct conversion for - * error callback handling and flushing the replay buffer. - */ - if( *pivotSource<*pivotTarget || - U_FAILURE(*pErrorCode) || - targetCnv->preFromULength<0 || - fromUArgs.flush - ) { - fromUArgs.source=*pivotSource; - fromUArgs.sourceLimit=*pivotTarget; - _fromUnicodeWithCallback(&fromUArgs, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - /* target overflow, or conversion error */ - *pivotSource=(UChar *)fromUArgs.source; - break; - } - - /* - * _fromUnicodeWithCallback() must have consumed the pivot contents - * (*pivotSource==*pivotTarget) since it returned with U_SUCCESS() - */ - } - - /* The pivot buffer is empty; reset it so we start at pivotStart. */ - *pivotSource=*pivotTarget=pivotStart; - - /* - * if(sourceCnv overflow buffer not empty) { - * move(sourceCnv overflow buffer -> pivot); - * continue; - * } - */ - /* output the sourceCnv overflow buffer */ - if(sourceCnv->UCharErrorBufferLength>0) { - if(ucnv_outputOverflowToUnicode(sourceCnv, pivotTarget, pivotLimit, NULL, pErrorCode)) { - /* U_BUFFER_OVERFLOW_ERROR */ - *pErrorCode=U_ZERO_ERROR; - } - continue; - } - - /* - * check for end of input and break if done - * - * Checking both flush and fromUArgs.flush ensures that the converters - * have been called with the flush flag set if the ucnv_convertEx() - * caller set it. - */ - if( toUArgs.source==sourceLimit && - sourceCnv->preToULength>=0 && sourceCnv->toULength==0 && - (!flush || fromUArgs.flush) - ) { - /* done successfully */ - break; - } - - /* - * use direct conversion if available - * but not if continuing a partial match - * or flushing the toUnicode replay buffer - */ - if(convert!=NULL && targetCnv->preFromUFirstCP<0 && sourceCnv->preToULength==0) { - if(*pErrorCode==U_USING_DEFAULT_WARNING) { - /* remove a warning that may be set by this function */ - *pErrorCode=U_ZERO_ERROR; - } - convert(&fromUArgs, &toUArgs, pErrorCode); - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - break; - } else if(U_FAILURE(*pErrorCode)) { - if(sourceCnv->toULength>0) { - /* - * Fall through to calling _toUnicodeWithCallback() - * for callback handling. - * - * The pivot buffer will be reset with - * *pivotSource=*pivotTarget=pivotStart; - * which indicates a toUnicode error to the caller - * (*pivotSource==pivotStart shows no pivot UChars consumed). - */ - } else { - /* - * Indicate a fromUnicode error to the caller - * (*pivotSource>pivotStart shows some pivot UChars consumed). - */ - *pivotSource=*pivotTarget=pivotStart+1; - /* - * Loop around to calling _fromUnicodeWithCallbacks() - * for callback handling. - */ - continue; - } - } else if(*pErrorCode==U_USING_DEFAULT_WARNING) { - /* - * No error, but the implementation requested to temporarily - * fall back to pivoting. - */ - *pErrorCode=U_ZERO_ERROR; - /* - * The following else branches are almost identical to the end-of-input - * handling in _toUnicodeWithCallback(). - * Avoid calling it just for the end of input. - */ - } else if(flush && sourceCnv->toULength>0) { /* flush==toUArgs.flush */ - /* - * the entire input stream is consumed - * and there is a partial, truncated input sequence left - */ - - /* inject an error and continue with callback handling */ - *pErrorCode=U_TRUNCATED_CHAR_FOUND; - } else { - /* input consumed */ - if(flush) { - /* reset the converters without calling the callback functions */ - _reset(sourceCnv, UCNV_RESET_TO_UNICODE, FALSE); - _reset(targetCnv, UCNV_RESET_FROM_UNICODE, FALSE); - } - - /* done successfully */ - break; - } - } - - /* - * toUnicode(source -> pivot); - * - * For pivoting conversion; and for direct conversion for - * error callback handling, continuing partial matches - * and flushing the replay buffer. - * - * The pivot buffer is empty and reset. - */ - toUArgs.target=pivotStart; /* ==*pivotTarget */ - /* toUArgs.targetLimit=pivotLimit; already set before the loop */ - _toUnicodeWithCallback(&toUArgs, pErrorCode); - *pivotTarget=toUArgs.target; - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) { - /* pivot overflow: continue with the conversion loop */ - *pErrorCode=U_ZERO_ERROR; - } else if(U_FAILURE(*pErrorCode) || (!flush && *pivotTarget==pivotStart)) { - /* conversion error, or there was nothing left to convert */ - break; - } - /* - * else: - * _toUnicodeWithCallback() wrote into the pivot buffer, - * continue with fromUnicode conversion. - * - * Set the fromUnicode flush flag if we flush and if toUnicode has - * processed the end of the input. - */ - if( flush && toUArgs.source==sourceLimit && - sourceCnv->preToULength>=0 && - sourceCnv->UCharErrorBufferLength==0 - ) { - fromUArgs.flush=TRUE; - } - } - - /* - * The conversion loop is exited when one of the following is true: - * - the entire source text has been converted successfully to the target buffer - * - a target buffer overflow occurred - * - a conversion error occurred - */ - - *source=toUArgs.source; - *target=fromUArgs.target; - - /* terminate the target buffer if possible */ - if(flush && U_SUCCESS(*pErrorCode)) { - if(*target!=targetLimit) { - **target=0; - if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) { - *pErrorCode=U_ZERO_ERROR; - } - } else { - *pErrorCode=U_STRING_NOT_TERMINATED_WARNING; - } - } -} - -/* internal implementation of ucnv_convert() etc. with preflighting */ -static int32_t -ucnv_internalConvert(UConverter *outConverter, UConverter *inConverter, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UChar pivotBuffer[CHUNK_SIZE]; - UChar *pivot, *pivot2; - - char *myTarget; - const char *sourceLimit; - const char *targetLimit; - int32_t targetLength=0; - - /* set up */ - if(sourceLength<0) { - sourceLimit=uprv_strchr(source, 0); - } else { - sourceLimit=source+sourceLength; - } - - /* if there is no input data, we're done */ - if(source==sourceLimit) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - pivot=pivot2=pivotBuffer; - myTarget=target; - targetLength=0; - - if(targetCapacity>0) { - /* perform real conversion */ - targetLimit=target+targetCapacity; - ucnv_convertEx(outConverter, inConverter, - &myTarget, targetLimit, - &source, sourceLimit, - pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, - FALSE, - TRUE, - pErrorCode); - targetLength=(int32_t)(myTarget-target); - } - - /* - * If the output buffer is exhausted (or we are only "preflighting"), we need to stop writing - * to it but continue the conversion in order to store in targetCapacity - * the number of bytes that was required. - */ - if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR || targetCapacity==0) - { - char targetBuffer[CHUNK_SIZE]; - - targetLimit=targetBuffer+CHUNK_SIZE; - do { - *pErrorCode=U_ZERO_ERROR; - myTarget=targetBuffer; - ucnv_convertEx(outConverter, inConverter, - &myTarget, targetLimit, - &source, sourceLimit, - pivotBuffer, &pivot, &pivot2, pivotBuffer+CHUNK_SIZE, - FALSE, - TRUE, - pErrorCode); - targetLength+=(int32_t)(myTarget-targetBuffer); - } while(*pErrorCode==U_BUFFER_OVERFLOW_ERROR); - - /* done with preflighting, set warnings and errors as appropriate */ - return u_terminateChars(target, targetCapacity, targetLength, pErrorCode); - } - - /* no need to call u_terminateChars() because ucnv_convertEx() took care of that */ - return targetLength; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_convert(const char *toConverterName, const char *fromConverterName, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UConverter in, out; /* stack-allocated */ - UConverter *inConverter, *outConverter; - int32_t targetLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( source==NULL || sourceLength<-1 || - targetCapacity<0 || (targetCapacity>0 && target==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* if there is no input data, we're done */ - if(sourceLength==0 || (sourceLength<0 && *source==0)) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - /* create the converters */ - inConverter=ucnv_createConverter(&in, fromConverterName, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - outConverter=ucnv_createConverter(&out, toConverterName, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - ucnv_close(inConverter); - return 0; - } - - targetLength=ucnv_internalConvert(outConverter, inConverter, - target, targetCapacity, - source, sourceLength, - pErrorCode); - - ucnv_close(inConverter); - ucnv_close(outConverter); - - return targetLength; -} - -/* @internal */ -static int32_t -ucnv_convertAlgorithmic(UBool convertToAlgorithmic, - UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - UConverter algoConverterStatic; /* stack-allocated */ - UConverter *algoConverter, *to, *from; - int32_t targetLength; - - if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { - return 0; - } - - if( cnv==NULL || source==NULL || sourceLength<-1 || - targetCapacity<0 || (targetCapacity>0 && target==NULL) - ) { - *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - /* if there is no input data, we're done */ - if(sourceLength==0 || (sourceLength<0 && *source==0)) { - return u_terminateChars(target, targetCapacity, 0, pErrorCode); - } - - /* create the algorithmic converter */ - algoConverter=ucnv_createAlgorithmicConverter(&algoConverterStatic, algorithmicType, - "", 0, pErrorCode); - if(U_FAILURE(*pErrorCode)) { - return 0; - } - - /* reset the other converter */ - if(convertToAlgorithmic) { - /* cnv->Unicode->algo */ - ucnv_resetToUnicode(cnv); - to=algoConverter; - from=cnv; - } else { - /* algo->Unicode->cnv */ - ucnv_resetFromUnicode(cnv); - from=algoConverter; - to=cnv; - } - - targetLength=ucnv_internalConvert(to, from, - target, targetCapacity, - source, sourceLength, - pErrorCode); - - ucnv_close(algoConverter); - - return targetLength; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toAlgorithmic(UConverterType algorithmicType, - UConverter *cnv, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - return ucnv_convertAlgorithmic(TRUE, algorithmicType, cnv, - target, targetCapacity, - source, sourceLength, - pErrorCode); -} - -U_CAPI int32_t U_EXPORT2 -ucnv_fromAlgorithmic(UConverter *cnv, - UConverterType algorithmicType, - char *target, int32_t targetCapacity, - const char *source, int32_t sourceLength, - UErrorCode *pErrorCode) { - return ucnv_convertAlgorithmic(FALSE, algorithmicType, cnv, - target, targetCapacity, - source, sourceLength, - pErrorCode); -} - -U_CAPI UConverterType U_EXPORT2 -ucnv_getType(const UConverter* converter) -{ - int8_t type = converter->sharedData->staticData->conversionType; -#if !UCONFIG_NO_LEGACY_CONVERSION - if(type == UCNV_MBCS) { - return ucnv_MBCSGetType(converter); - } -#endif - return (UConverterType)type; -} - -U_CAPI void U_EXPORT2 -ucnv_getStarters(const UConverter* converter, - UBool starters[256], - UErrorCode* err) -{ - if (err == NULL || U_FAILURE(*err)) { - return; - } - - if(converter->sharedData->impl->getStarters != NULL) { - converter->sharedData->impl->getStarters(converter, starters, err); - } else { - *err = U_ILLEGAL_ARGUMENT_ERROR; - } -} - -static const UAmbiguousConverter *ucnv_getAmbiguous(const UConverter *cnv) -{ - UErrorCode errorCode; - const char *name; - int32_t i; - - if(cnv==NULL) { - return NULL; - } - - errorCode=U_ZERO_ERROR; - name=ucnv_getName(cnv, &errorCode); - if(U_FAILURE(errorCode)) { - return NULL; - } - - for(i=0; i<UPRV_LENGTHOF(ambiguousConverters); ++i) - { - if(0==uprv_strcmp(name, ambiguousConverters[i].name)) - { - return ambiguousConverters+i; - } - } - - return NULL; -} - -U_CAPI void U_EXPORT2 -ucnv_fixFileSeparator(const UConverter *cnv, - UChar* source, - int32_t sourceLength) { - const UAmbiguousConverter *a; - int32_t i; - UChar variant5c; - - if(cnv==NULL || source==NULL || sourceLength<=0 || (a=ucnv_getAmbiguous(cnv))==NULL) - { - return; - } - - variant5c=a->variant5c; - for(i=0; i<sourceLength; ++i) { - if(source[i]==variant5c) { - source[i]=0x5c; - } - } -} - -U_CAPI UBool U_EXPORT2 -ucnv_isAmbiguous(const UConverter *cnv) { - return (UBool)(ucnv_getAmbiguous(cnv)!=NULL); -} - -U_CAPI void U_EXPORT2 -ucnv_setFallback(UConverter *cnv, UBool usesFallback) -{ - cnv->useFallback = usesFallback; -} - -U_CAPI UBool U_EXPORT2 -ucnv_usesFallback(const UConverter *cnv) -{ - return cnv->useFallback; -} - -U_CAPI void U_EXPORT2 -ucnv_getInvalidChars (const UConverter * converter, - char *errBytes, - int8_t * len, - UErrorCode * err) -{ - if (err == NULL || U_FAILURE(*err)) - { - return; - } - if (len == NULL || errBytes == NULL || converter == NULL) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (*len < converter->invalidCharLength) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if ((*len = converter->invalidCharLength) > 0) - { - uprv_memcpy (errBytes, converter->invalidCharBuffer, *len); - } -} - -U_CAPI void U_EXPORT2 -ucnv_getInvalidUChars (const UConverter * converter, - UChar *errChars, - int8_t * len, - UErrorCode * err) -{ - if (err == NULL || U_FAILURE(*err)) - { - return; - } - if (len == NULL || errChars == NULL || converter == NULL) - { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (*len < converter->invalidUCharLength) - { - *err = U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - if ((*len = converter->invalidUCharLength) > 0) - { - u_memcpy (errChars, converter->invalidUCharBuffer, *len); - } -} - -#define SIG_MAX_LEN 5 - -U_CAPI const char* U_EXPORT2 -ucnv_detectUnicodeSignature( const char* source, - int32_t sourceLength, - int32_t* signatureLength, - UErrorCode* pErrorCode) { - int32_t dummy; - - /* initial 0xa5 bytes: make sure that if we read <SIG_MAX_LEN - * bytes we don't misdetect something - */ - char start[SIG_MAX_LEN]={ '\xa5', '\xa5', '\xa5', '\xa5', '\xa5' }; - int i = 0; - - if((pErrorCode==NULL) || U_FAILURE(*pErrorCode)){ - return NULL; - } - - if(source == NULL || sourceLength < -1){ - *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; - return NULL; - } - - if(signatureLength == NULL) { - signatureLength = &dummy; - } - - if(sourceLength==-1){ - sourceLength=(int32_t)uprv_strlen(source); - } - - - while(i<sourceLength&& i<SIG_MAX_LEN){ - start[i]=source[i]; - i++; - } - - if(start[0] == '\xFE' && start[1] == '\xFF') { - *signatureLength=2; - return "UTF-16BE"; - } else if(start[0] == '\xFF' && start[1] == '\xFE') { - if(start[2] == '\x00' && start[3] =='\x00') { - *signatureLength=4; - return "UTF-32LE"; - } else { - *signatureLength=2; - return "UTF-16LE"; - } - } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { - *signatureLength=3; - return "UTF-8"; - } else if(start[0] == '\x00' && start[1] == '\x00' && - start[2] == '\xFE' && start[3]=='\xFF') { - *signatureLength=4; - return "UTF-32BE"; - } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { - *signatureLength=3; - return "SCSU"; - } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { - *signatureLength=3; - return "BOCU-1"; - } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { - /* - * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ - * depending on the second UTF-16 code unit. - * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF - * if it occurs. - * - * So far we have +/v - */ - if(start[3] == '\x38' && start[4] == '\x2D') { - /* 5 bytes +/v8- */ - *signatureLength=5; - return "UTF-7"; - } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { - /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ - *signatureLength=4; - return "UTF-7"; - } - }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ - *signatureLength=4; - return "UTF-EBCDIC"; - } - - - /* no known Unicode signature byte sequence recognized */ - *signatureLength=0; - return NULL; -} - -U_CAPI int32_t U_EXPORT2 -ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) -{ - if(status == NULL || U_FAILURE(*status)){ - return -1; - } - if(cnv == NULL){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } - - if(cnv->preFromUFirstCP >= 0){ - return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; - }else if(cnv->preFromULength < 0){ - return -cnv->preFromULength ; - }else if(cnv->fromUChar32 > 0){ - return 1; - } + while(i<sourceLength&& i<SIG_MAX_LEN){ + start[i]=source[i]; + i++; + } + + if(start[0] == '\xFE' && start[1] == '\xFF') { + *signatureLength=2; + return "UTF-16BE"; + } else if(start[0] == '\xFF' && start[1] == '\xFE') { + if(start[2] == '\x00' && start[3] =='\x00') { + *signatureLength=4; + return "UTF-32LE"; + } else { + *signatureLength=2; + return "UTF-16LE"; + } + } else if(start[0] == '\xEF' && start[1] == '\xBB' && start[2] == '\xBF') { + *signatureLength=3; + return "UTF-8"; + } else if(start[0] == '\x00' && start[1] == '\x00' && + start[2] == '\xFE' && start[3]=='\xFF') { + *signatureLength=4; + return "UTF-32BE"; + } else if(start[0] == '\x0E' && start[1] == '\xFE' && start[2] == '\xFF') { + *signatureLength=3; + return "SCSU"; + } else if(start[0] == '\xFB' && start[1] == '\xEE' && start[2] == '\x28') { + *signatureLength=3; + return "BOCU-1"; + } else if(start[0] == '\x2B' && start[1] == '\x2F' && start[2] == '\x76') { + /* + * UTF-7: Initial U+FEFF is encoded as +/v8 or +/v9 or +/v+ or +/v/ + * depending on the second UTF-16 code unit. + * Detect the entire, closed Unicode mode sequence +/v8- for only U+FEFF + * if it occurs. + * + * So far we have +/v + */ + if(start[3] == '\x38' && start[4] == '\x2D') { + /* 5 bytes +/v8- */ + *signatureLength=5; + return "UTF-7"; + } else if(start[3] == '\x38' || start[3] == '\x39' || start[3] == '\x2B' || start[3] == '\x2F') { + /* 4 bytes +/v8 or +/v9 or +/v+ or +/v/ */ + *signatureLength=4; + return "UTF-7"; + } + }else if(start[0]=='\xDD' && start[1]== '\x73'&& start[2]=='\x66' && start[3]=='\x73'){ + *signatureLength=4; + return "UTF-EBCDIC"; + } + + + /* no known Unicode signature byte sequence recognized */ + *signatureLength=0; + return NULL; +} + +U_CAPI int32_t U_EXPORT2 +ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status) +{ + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preFromUFirstCP >= 0){ + return U16_LENGTH(cnv->preFromUFirstCP)+cnv->preFromULength ; + }else if(cnv->preFromULength < 0){ + return -cnv->preFromULength ; + }else if(cnv->fromUChar32 > 0){ + return 1; + } + return 0; + +} + +U_CAPI int32_t U_EXPORT2 +ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ + + if(status == NULL || U_FAILURE(*status)){ + return -1; + } + if(cnv == NULL){ + *status = U_ILLEGAL_ARGUMENT_ERROR; + return -1; + } + + if(cnv->preToULength > 0){ + return cnv->preToULength ; + }else if(cnv->preToULength < 0){ + return -cnv->preToULength; + }else if(cnv->toULength > 0){ + return cnv->toULength; + } return 0; - -} - -U_CAPI int32_t U_EXPORT2 -ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status){ - - if(status == NULL || U_FAILURE(*status)){ - return -1; - } - if(cnv == NULL){ - *status = U_ILLEGAL_ARGUMENT_ERROR; - return -1; - } - - if(cnv->preToULength > 0){ - return cnv->preToULength ; - }else if(cnv->preToULength < 0){ - return -cnv->preToULength; - }else if(cnv->toULength > 0){ - return cnv->toULength; - } - return 0; -} - -U_CAPI UBool U_EXPORT2 -ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ - if (U_FAILURE(*status)) { - return FALSE; - } - - if (cnv == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return FALSE; - } - - switch (ucnv_getType(cnv)) { - case UCNV_SBCS: - case UCNV_DBCS: - case UCNV_UTF32_BigEndian: - case UCNV_UTF32_LittleEndian: - case UCNV_UTF32: - case UCNV_US_ASCII: - return TRUE; - default: - return FALSE; - } -} -#endif - -/* - * Hey, Emacs, please set the following: - * - * Local Variables: - * indent-tabs-mode: nil - * End: - * - */ +} + +U_CAPI UBool U_EXPORT2 +ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status){ + if (U_FAILURE(*status)) { + return FALSE; + } + + if (cnv == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + + switch (ucnv_getType(cnv)) { + case UCNV_SBCS: + case UCNV_DBCS: + case UCNV_UTF32_BigEndian: + case UCNV_UTF32_LittleEndian: + case UCNV_UTF32: + case UCNV_US_ASCII: + return TRUE; + default: + return FALSE; + } +} +#endif + +/* + * Hey, Emacs, please set the following: + * + * Local Variables: + * indent-tabs-mode: nil + * End: + * + */ |