diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:15 +0300 |
commit | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (patch) | |
tree | 2768b1223e96a8a0610a93d18425d9647c1123c8 /contrib/libs/icu/io/ustdio.cpp | |
parent | 60040c91ffe701a84689b2c6310ff845e65cff42 (diff) | |
download | ydb-e9d19cec64684c9c1e6b0c98297e5b895cf904fe.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/icu/io/ustdio.cpp')
-rw-r--r-- | contrib/libs/icu/io/ustdio.cpp | 1462 |
1 files changed, 731 insertions, 731 deletions
diff --git a/contrib/libs/icu/io/ustdio.cpp b/contrib/libs/icu/io/ustdio.cpp index d6eb5cf9ec..91f0cd2cf2 100644 --- a/contrib/libs/icu/io/ustdio.cpp +++ b/contrib/libs/icu/io/ustdio.cpp @@ -1,732 +1,732 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ****************************************************************************** - * - * Copyright (C) 1998-2016, International Business Machines - * Corporation and others. All Rights Reserved. - * - ****************************************************************************** - * - * File ustdio.c - * - * Modification History: - * - * Date Name Description - * 11/18/98 stephen Creation. - * 03/12/99 stephen Modified for new C API. - * 07/19/99 stephen Fixed read() and gets() - ****************************************************************************** - */ - -#include "unicode/ustdio.h" - -#if !UCONFIG_NO_CONVERSION - -#include "unicode/putil.h" -#include "cmemory.h" -#include "cstring.h" -#include "ufile.h" -#include "ufmt_cmn.h" -#include "unicode/ucnv.h" -#include "unicode/ustring.h" - -#include <string.h> - -#define DELIM_LF 0x000A -#define DELIM_VT 0x000B -#define DELIM_FF 0x000C -#define DELIM_CR 0x000D -#define DELIM_NEL 0x0085 -#define DELIM_LS 0x2028 -#define DELIM_PS 0x2029 - -/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ -#if U_PLATFORM_USES_ONLY_WIN32_API -static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; -static const uint32_t DELIMITERS_LEN = 2; -/* TODO: Default newline writing should be detected based upon the converter being used. */ -#else -static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; -static const uint32_t DELIMITERS_LEN = 1; -#endif - -#define IS_FIRST_STRING_DELIMITER(c1) \ - (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ - || (c1) == DELIM_NEL \ - || (c1) == DELIM_LS \ - || (c1) == DELIM_PS) -#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) -#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ - (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) - - -#if !UCONFIG_NO_TRANSLITERATION - -U_CAPI UTransliterator* U_EXPORT2 -u_fsettransliterator(UFILE *file, UFileDirection direction, - UTransliterator *adopt, UErrorCode *status) -{ - UTransliterator *old = NULL; - - if(U_FAILURE(*status)) - { - return adopt; - } - - if(!file) - { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return adopt; - } - - if(direction & U_READ) - { - /** TODO: implement */ - *status = U_UNSUPPORTED_ERROR; - return adopt; - } - - if(adopt == NULL) /* they are clearing it */ - { - if(file->fTranslit != NULL) - { - /* TODO: Check side */ - old = file->fTranslit->translit; - uprv_free(file->fTranslit->buffer); - file->fTranslit->buffer=NULL; - uprv_free(file->fTranslit); - file->fTranslit=NULL; - } - } - else - { - if(file->fTranslit == NULL) - { - file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); - if(!file->fTranslit) - { - *status = U_MEMORY_ALLOCATION_ERROR; - return adopt; - } - file->fTranslit->capacity = 0; - file->fTranslit->length = 0; - file->fTranslit->pos = 0; - file->fTranslit->buffer = NULL; - } - else - { - old = file->fTranslit->translit; - ufile_flush_translit(file); - } - - file->fTranslit->translit = adopt; - } - - return old; -} - -static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) -{ - int32_t newlen; - int32_t junkCount = 0; - int32_t textLength; - int32_t textLimit; - UTransPosition pos; - UErrorCode status = U_ZERO_ERROR; - - if(count == NULL) - { - count = &junkCount; - } - - if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) - { - /* fast path */ - return src; - } - - /* First: slide over everything */ - if(f->fTranslit->length > f->fTranslit->pos) - { - memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, - (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); - } - f->fTranslit->length -= f->fTranslit->pos; /* always */ - f->fTranslit->pos = 0; - - /* Calculate new buffer size needed */ - newlen = (*count + f->fTranslit->length) * 4; - - if(newlen > f->fTranslit->capacity) - { - if(f->fTranslit->buffer == NULL) - { - f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); - } - else - { - f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); - } - /* Check for malloc/realloc failure. */ - if (f->fTranslit->buffer == NULL) { - return NULL; - } - f->fTranslit->capacity = newlen; - } - - /* Now, copy any data over */ - u_strncpy(f->fTranslit->buffer + f->fTranslit->length, - src, - *count); - f->fTranslit->length += *count; - - /* Now, translit in place as much as we can */ - if(flush == FALSE) - { - textLength = f->fTranslit->length; - pos.contextStart = 0; - pos.contextLimit = textLength; - pos.start = 0; - pos.limit = textLength; - - utrans_transIncrementalUChars(f->fTranslit->translit, - f->fTranslit->buffer, /* because we shifted */ - &textLength, - f->fTranslit->capacity, - &pos, - &status); - - /* now: start/limit point to the transliterated text */ - /* Transliterated is [buffer..pos.start) */ - *count = pos.start; - f->fTranslit->pos = pos.start; - f->fTranslit->length = pos.limit; - - return f->fTranslit->buffer; - } - else - { - textLength = f->fTranslit->length; - textLimit = f->fTranslit->length; - - utrans_transUChars(f->fTranslit->translit, - f->fTranslit->buffer, - &textLength, - f->fTranslit->capacity, - 0, - &textLimit, - &status); - - /* out: converted len */ - *count = textLimit; - - /* Set pointers to 0 */ - f->fTranslit->pos = 0; - f->fTranslit->length = 0; - - return f->fTranslit->buffer; - } -} - -#endif - -void -ufile_flush_translit(UFILE *f) -{ -#if !UCONFIG_NO_TRANSLITERATION - if((!f)||(!f->fTranslit)) - return; -#endif - - u_file_write_flush(NULL, 0, f, FALSE, TRUE); -} - - -void -ufile_flush_io(UFILE *f) -{ - if((!f) || (!f->fFile)) { - return; /* skip if no file */ - } - - u_file_write_flush(NULL, 0, f, TRUE, FALSE); -} - - -void -ufile_close_translit(UFILE *f) -{ -#if !UCONFIG_NO_TRANSLITERATION - if((!f)||(!f->fTranslit)) - return; -#endif - - ufile_flush_translit(f); - -#if !UCONFIG_NO_TRANSLITERATION - if(f->fTranslit->translit) - utrans_close(f->fTranslit->translit); - - if(f->fTranslit->buffer) - { - uprv_free(f->fTranslit->buffer); - } - - uprv_free(f->fTranslit); - f->fTranslit = NULL; -#endif -} - - -/* Input/output */ - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fputs(const UChar *s, - UFILE *f) -{ - int32_t count = u_file_write(s, u_strlen(s), f); - count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); - return count; -} - -U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fputc(UChar32 uc, - UFILE *f) -{ - UChar buf[2]; - int32_t idx = 0; - UBool isError = FALSE; - - U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); - if (isError) { - return U_EOF; - } - return u_file_write(buf, idx, f) == idx ? uc : U_EOF; -} - - -U_CFUNC int32_t U_EXPORT2 -u_file_write_flush(const UChar *chars, - int32_t count, - UFILE *f, - UBool flushIO, - UBool flushTranslit) -{ - /* Set up conversion parameters */ - UErrorCode status = U_ZERO_ERROR; - const UChar *mySource = chars; - const UChar *mySourceBegin; - const UChar *mySourceEnd; - char charBuffer[UFILE_CHARBUFFER_SIZE]; - char *myTarget = charBuffer; - int32_t written = 0; - int32_t numConverted = 0; - - if (count < 0) { - count = u_strlen(chars); - } - -#if !UCONFIG_NO_TRANSLITERATION - if((f->fTranslit) && (f->fTranslit->translit)) - { - /* Do the transliteration */ - mySource = u_file_translit(f, chars, &count, flushTranslit); - } -#endif - - /* Write to a string. */ - if (!f->fFile) { - int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); - if (flushIO && charsLeft > count) { - count++; - } - written = ufmt_min(count, charsLeft); - u_strncpy(f->str.fPos, mySource, written); - f->str.fPos += written; - return written; - } - - mySourceEnd = mySource + count; - - /* Perform the conversion in a loop */ - do { - mySourceBegin = mySource; /* beginning location for this loop */ - status = U_ZERO_ERROR; - if(f->fConverter != NULL) { /* We have a valid converter */ - ucnv_fromUnicode(f->fConverter, - &myTarget, - charBuffer + UFILE_CHARBUFFER_SIZE, - &mySource, - mySourceEnd, - NULL, - flushIO, - &status); - } else { /*weiv: do the invariant conversion */ - int32_t convertChars = (int32_t) (mySourceEnd - mySource); - if (convertChars > UFILE_CHARBUFFER_SIZE) { - convertChars = UFILE_CHARBUFFER_SIZE; - status = U_BUFFER_OVERFLOW_ERROR; +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ****************************************************************************** + * + * Copyright (C) 1998-2016, International Business Machines + * Corporation and others. All Rights Reserved. + * + ****************************************************************************** + * + * File ustdio.c + * + * Modification History: + * + * Date Name Description + * 11/18/98 stephen Creation. + * 03/12/99 stephen Modified for new C API. + * 07/19/99 stephen Fixed read() and gets() + ****************************************************************************** + */ + +#include "unicode/ustdio.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "cmemory.h" +#include "cstring.h" +#include "ufile.h" +#include "ufmt_cmn.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" + +#include <string.h> + +#define DELIM_LF 0x000A +#define DELIM_VT 0x000B +#define DELIM_FF 0x000C +#define DELIM_CR 0x000D +#define DELIM_NEL 0x0085 +#define DELIM_LS 0x2028 +#define DELIM_PS 0x2029 + +/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ +#if U_PLATFORM_USES_ONLY_WIN32_API +static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 2; +/* TODO: Default newline writing should be detected based upon the converter being used. */ +#else +static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 }; +static const uint32_t DELIMITERS_LEN = 1; +#endif + +#define IS_FIRST_STRING_DELIMITER(c1) \ + (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ + || (c1) == DELIM_NEL \ + || (c1) == DELIM_LS \ + || (c1) == DELIM_PS) +#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) +#define IS_COMBINED_STRING_DELIMITER(c1, c2) \ + (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) + + +#if !UCONFIG_NO_TRANSLITERATION + +U_CAPI UTransliterator* U_EXPORT2 +u_fsettransliterator(UFILE *file, UFileDirection direction, + UTransliterator *adopt, UErrorCode *status) +{ + UTransliterator *old = NULL; + + if(U_FAILURE(*status)) + { + return adopt; + } + + if(!file) + { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return adopt; + } + + if(direction & U_READ) + { + /** TODO: implement */ + *status = U_UNSUPPORTED_ERROR; + return adopt; + } + + if(adopt == NULL) /* they are clearing it */ + { + if(file->fTranslit != NULL) + { + /* TODO: Check side */ + old = file->fTranslit->translit; + uprv_free(file->fTranslit->buffer); + file->fTranslit->buffer=NULL; + uprv_free(file->fTranslit); + file->fTranslit=NULL; + } + } + else + { + if(file->fTranslit == NULL) + { + file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer)); + if(!file->fTranslit) + { + *status = U_MEMORY_ALLOCATION_ERROR; + return adopt; } - u_UCharsToChars(mySource, myTarget, convertChars); - mySource += convertChars; - myTarget += convertChars; - } - numConverted = (int32_t)(myTarget - charBuffer); - - if (numConverted > 0) { - /* write the converted bytes */ - fwrite(charBuffer, - sizeof(char), - numConverted, - f->fFile); - - written += (int32_t) (mySource - mySourceBegin); - } - myTarget = charBuffer; - } - while(status == U_BUFFER_OVERFLOW_ERROR); - - /* return # of chars written */ - return written; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_file_write( const UChar *chars, - int32_t count, - UFILE *f) -{ - return u_file_write_flush(chars,count,f,FALSE,FALSE); -} - - -/* private function used for buffering input */ -void -ufile_fill_uchar_buffer(UFILE *f) -{ - UErrorCode status; - const char *mySource; - const char *mySourceEnd; - UChar *myTarget; - int32_t bufferSize; - int32_t maxCPBytes; - int32_t bytesRead; - int32_t availLength; - int32_t dataSize; - char charBuffer[UFILE_CHARBUFFER_SIZE]; - u_localized_string *str; - - if (f->fFile == NULL) { - /* There is nothing to do. It's a string. */ - return; - } - - str = &f->str; - dataSize = (int32_t)(str->fLimit - str->fPos); - if (f->fFileno == 0 && dataSize > 0) { - /* Don't read from stdin too many times. There is still some data. */ - return; - } - - /* shift the buffer if it isn't empty */ - if(dataSize != 0) { - u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ - } - - - /* record how much buffer space is available */ - availLength = UFILE_UCHARBUFFER_SIZE - dataSize; - - /* Determine the # of codepage bytes needed to fill our UChar buffer */ - /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ - maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); - - /* Read in the data to convert */ - if (f->fFileno == 0) { - /* Special case. Read from stdin one line at a time. */ - char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); - bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); - } - else { - /* A normal file */ - bytesRead = (int32_t)fread(charBuffer, - sizeof(char), - ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), - f->fFile); - } - - /* Set up conversion parameters */ - status = U_ZERO_ERROR; - mySource = charBuffer; - mySourceEnd = charBuffer + bytesRead; - myTarget = f->fUCBuffer + dataSize; - bufferSize = UFILE_UCHARBUFFER_SIZE; - - if(f->fConverter != NULL) { /* We have a valid converter */ - /* Perform the conversion */ - ucnv_toUnicode(f->fConverter, - &myTarget, - f->fUCBuffer + bufferSize, - &mySource, - mySourceEnd, - NULL, - (UBool)(feof(f->fFile) != 0), - &status); - - } else { /*weiv: do the invariant conversion */ - u_charsToUChars(mySource, myTarget, bytesRead); - myTarget += bytesRead; - } - - /* update the pointers into our array */ - str->fPos = str->fBuffer; - str->fLimit = myTarget; -} - -U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgets(UChar *s, - int32_t n, - UFILE *f) -{ - int32_t dataSize; - int32_t count; - UChar *alias; - const UChar *limit; - UChar *sItr; - UChar currDelim = 0; - u_localized_string *str; - - if (n <= 0) { - /* Caller screwed up. We need to write the null terminatior. */ - return NULL; - } - - /* fill the buffer if needed */ - str = &f->str; - if (str->fPos >= str->fLimit) { - ufile_fill_uchar_buffer(f); - } - - /* subtract 1 from n to compensate for the terminator */ - --n; - - /* determine the amount of data in the buffer */ - dataSize = (int32_t)(str->fLimit - str->fPos); - - /* if 0 characters were left, return 0 */ - if (dataSize == 0) - return NULL; - - /* otherwise, iteratively fill the buffer and copy */ - count = 0; - sItr = s; - currDelim = 0; - while (dataSize > 0 && count < n) { - alias = str->fPos; - - /* Find how much to copy */ - if (dataSize < (n - count)) { - limit = str->fLimit; - } - else { - limit = alias + (n - count); - } - - if (!currDelim) { - /* Copy UChars until we find the first occurrence of a delimiter character */ - while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { - count++; - *(sItr++) = *(alias++); - } - /* Preserve the newline */ - if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { - if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { - currDelim = *alias; - } - else { - currDelim = 1; /* This isn't a newline, but it's used to say - that we should break later. We've checked all - possible newline combinations even across buffer - boundaries. */ - } - count++; - *(sItr++) = *(alias++); - } - } - /* If we have a CRLF combination, preserve that too. */ - if (alias < limit) { - if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { - count++; - *(sItr++) = *(alias++); - } - currDelim = 1; /* This isn't a newline, but it's used to say - that we should break later. We've checked all - possible newline combinations even across buffer - boundaries. */ - } - - /* update the current buffer position */ - str->fPos = alias; - - /* if we found a delimiter */ - if (currDelim == 1) { - /* break out */ - break; - } - - /* refill the buffer */ - ufile_fill_uchar_buffer(f); - - /* determine the amount of data in the buffer */ - dataSize = (int32_t)(str->fLimit - str->fPos); - } - - /* add the terminator and return s */ - *sItr = 0x0000; - return s; -} - -U_CFUNC UBool U_EXPORT2 -ufile_getch(UFILE *f, UChar *ch) -{ - UBool isValidChar = FALSE; - - *ch = U_EOF; - /* if we have an available character in the buffer, return it */ - if(f->str.fPos < f->str.fLimit){ - *ch = *(f->str.fPos)++; - isValidChar = TRUE; - } - else { - /* otherwise, fill the buffer and return the next character */ - if(f->str.fPos >= f->str.fLimit) { - ufile_fill_uchar_buffer(f); - } - if(f->str.fPos < f->str.fLimit) { - *ch = *(f->str.fPos)++; - isValidChar = TRUE; - } - } - return isValidChar; -} - -U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetc(UFILE *f) -{ - UChar ch; - ufile_getch(f, &ch); - return ch; -} - -U_CFUNC UBool U_EXPORT2 -ufile_getch32(UFILE *f, UChar32 *c32) -{ - UBool isValidChar = FALSE; - u_localized_string *str; - - *c32 = U_EOF; - - /* Fill the buffer if it is empty */ - str = &f->str; - if (f && str->fPos + 1 >= str->fLimit) { - ufile_fill_uchar_buffer(f); - } - - /* Get the next character in the buffer */ - if (str->fPos < str->fLimit) { - *c32 = *(str->fPos)++; - if (U_IS_LEAD(*c32)) { - if (str->fPos < str->fLimit) { - UChar c16 = *(str->fPos)++; - *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); - isValidChar = TRUE; - } - else { - *c32 = U_EOF; - } - } - else { - isValidChar = TRUE; - } - } - - return isValidChar; -} - -U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fgetcx(UFILE *f) -{ - UChar32 ch; - ufile_getch32(f, &ch); - return ch; -} - -U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_fungetc(UChar32 ch, - UFILE *f) -{ - u_localized_string *str; - - str = &f->str; - - /* if we're at the beginning of the buffer, sorry! */ - if (str->fPos == str->fBuffer - || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) - { - ch = U_EOF; - } - else { - /* otherwise, put the character back */ - /* Remember, read them back on in the reverse order. */ - if (U_IS_LEAD(ch)) { - if (*--(str->fPos) != U16_TRAIL(ch) - || *--(str->fPos) != U16_LEAD(ch)) - { - ch = U_EOF; - } - } - else if (*--(str->fPos) != ch) { - ch = U_EOF; - } - } - return ch; -} - -U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ -u_file_read( UChar *chars, - int32_t count, - UFILE *f) -{ - int32_t dataSize; - int32_t read = 0; - u_localized_string *str = &f->str; - - do { - - /* determine the amount of data in the buffer */ - dataSize = (int32_t)(str->fLimit - str->fPos); - if (dataSize <= 0) { - /* fill the buffer */ - ufile_fill_uchar_buffer(f); - dataSize = (int32_t)(str->fLimit - str->fPos); - } - - /* Make sure that we don't read too much */ - if (dataSize > (count - read)) { - dataSize = count - read; - } - - /* copy the current data in the buffer */ - memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); - - /* update number of items read */ - read += dataSize; - - /* update the current buffer position */ - str->fPos += dataSize; - } - while (dataSize != 0 && read < count); - - return read; -} -#endif + file->fTranslit->capacity = 0; + file->fTranslit->length = 0; + file->fTranslit->pos = 0; + file->fTranslit->buffer = NULL; + } + else + { + old = file->fTranslit->translit; + ufile_flush_translit(file); + } + + file->fTranslit->translit = adopt; + } + + return old; +} + +static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush) +{ + int32_t newlen; + int32_t junkCount = 0; + int32_t textLength; + int32_t textLimit; + UTransPosition pos; + UErrorCode status = U_ZERO_ERROR; + + if(count == NULL) + { + count = &junkCount; + } + + if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit)) + { + /* fast path */ + return src; + } + + /* First: slide over everything */ + if(f->fTranslit->length > f->fTranslit->pos) + { + memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos, + (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar)); + } + f->fTranslit->length -= f->fTranslit->pos; /* always */ + f->fTranslit->pos = 0; + + /* Calculate new buffer size needed */ + newlen = (*count + f->fTranslit->length) * 4; + + if(newlen > f->fTranslit->capacity) + { + if(f->fTranslit->buffer == NULL) + { + f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar)); + } + else + { + f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar)); + } + /* Check for malloc/realloc failure. */ + if (f->fTranslit->buffer == NULL) { + return NULL; + } + f->fTranslit->capacity = newlen; + } + + /* Now, copy any data over */ + u_strncpy(f->fTranslit->buffer + f->fTranslit->length, + src, + *count); + f->fTranslit->length += *count; + + /* Now, translit in place as much as we can */ + if(flush == FALSE) + { + textLength = f->fTranslit->length; + pos.contextStart = 0; + pos.contextLimit = textLength; + pos.start = 0; + pos.limit = textLength; + + utrans_transIncrementalUChars(f->fTranslit->translit, + f->fTranslit->buffer, /* because we shifted */ + &textLength, + f->fTranslit->capacity, + &pos, + &status); + + /* now: start/limit point to the transliterated text */ + /* Transliterated is [buffer..pos.start) */ + *count = pos.start; + f->fTranslit->pos = pos.start; + f->fTranslit->length = pos.limit; + + return f->fTranslit->buffer; + } + else + { + textLength = f->fTranslit->length; + textLimit = f->fTranslit->length; + + utrans_transUChars(f->fTranslit->translit, + f->fTranslit->buffer, + &textLength, + f->fTranslit->capacity, + 0, + &textLimit, + &status); + + /* out: converted len */ + *count = textLimit; + + /* Set pointers to 0 */ + f->fTranslit->pos = 0; + f->fTranslit->length = 0; + + return f->fTranslit->buffer; + } +} + +#endif + +void +ufile_flush_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + u_file_write_flush(NULL, 0, f, FALSE, TRUE); +} + + +void +ufile_flush_io(UFILE *f) +{ + if((!f) || (!f->fFile)) { + return; /* skip if no file */ + } + + u_file_write_flush(NULL, 0, f, TRUE, FALSE); +} + + +void +ufile_close_translit(UFILE *f) +{ +#if !UCONFIG_NO_TRANSLITERATION + if((!f)||(!f->fTranslit)) + return; +#endif + + ufile_flush_translit(f); + +#if !UCONFIG_NO_TRANSLITERATION + if(f->fTranslit->translit) + utrans_close(f->fTranslit->translit); + + if(f->fTranslit->buffer) + { + uprv_free(f->fTranslit->buffer); + } + + uprv_free(f->fTranslit); + f->fTranslit = NULL; +#endif +} + + +/* Input/output */ + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputs(const UChar *s, + UFILE *f) +{ + int32_t count = u_file_write(s, u_strlen(s), f); + count += u_file_write(DELIMITERS, DELIMITERS_LEN, f); + return count; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fputc(UChar32 uc, + UFILE *f) +{ + UChar buf[2]; + int32_t idx = 0; + UBool isError = FALSE; + + U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError); + if (isError) { + return U_EOF; + } + return u_file_write(buf, idx, f) == idx ? uc : U_EOF; +} + + +U_CFUNC int32_t U_EXPORT2 +u_file_write_flush(const UChar *chars, + int32_t count, + UFILE *f, + UBool flushIO, + UBool flushTranslit) +{ + /* Set up conversion parameters */ + UErrorCode status = U_ZERO_ERROR; + const UChar *mySource = chars; + const UChar *mySourceBegin; + const UChar *mySourceEnd; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + char *myTarget = charBuffer; + int32_t written = 0; + int32_t numConverted = 0; + + if (count < 0) { + count = u_strlen(chars); + } + +#if !UCONFIG_NO_TRANSLITERATION + if((f->fTranslit) && (f->fTranslit->translit)) + { + /* Do the transliteration */ + mySource = u_file_translit(f, chars, &count, flushTranslit); + } +#endif + + /* Write to a string. */ + if (!f->fFile) { + int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos); + if (flushIO && charsLeft > count) { + count++; + } + written = ufmt_min(count, charsLeft); + u_strncpy(f->str.fPos, mySource, written); + f->str.fPos += written; + return written; + } + + mySourceEnd = mySource + count; + + /* Perform the conversion in a loop */ + do { + mySourceBegin = mySource; /* beginning location for this loop */ + status = U_ZERO_ERROR; + if(f->fConverter != NULL) { /* We have a valid converter */ + ucnv_fromUnicode(f->fConverter, + &myTarget, + charBuffer + UFILE_CHARBUFFER_SIZE, + &mySource, + mySourceEnd, + NULL, + flushIO, + &status); + } else { /*weiv: do the invariant conversion */ + int32_t convertChars = (int32_t) (mySourceEnd - mySource); + if (convertChars > UFILE_CHARBUFFER_SIZE) { + convertChars = UFILE_CHARBUFFER_SIZE; + status = U_BUFFER_OVERFLOW_ERROR; + } + u_UCharsToChars(mySource, myTarget, convertChars); + mySource += convertChars; + myTarget += convertChars; + } + numConverted = (int32_t)(myTarget - charBuffer); + + if (numConverted > 0) { + /* write the converted bytes */ + fwrite(charBuffer, + sizeof(char), + numConverted, + f->fFile); + + written += (int32_t) (mySource - mySourceBegin); + } + myTarget = charBuffer; + } + while(status == U_BUFFER_OVERFLOW_ERROR); + + /* return # of chars written */ + return written; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_write( const UChar *chars, + int32_t count, + UFILE *f) +{ + return u_file_write_flush(chars,count,f,FALSE,FALSE); +} + + +/* private function used for buffering input */ +void +ufile_fill_uchar_buffer(UFILE *f) +{ + UErrorCode status; + const char *mySource; + const char *mySourceEnd; + UChar *myTarget; + int32_t bufferSize; + int32_t maxCPBytes; + int32_t bytesRead; + int32_t availLength; + int32_t dataSize; + char charBuffer[UFILE_CHARBUFFER_SIZE]; + u_localized_string *str; + + if (f->fFile == NULL) { + /* There is nothing to do. It's a string. */ + return; + } + + str = &f->str; + dataSize = (int32_t)(str->fLimit - str->fPos); + if (f->fFileno == 0 && dataSize > 0) { + /* Don't read from stdin too many times. There is still some data. */ + return; + } + + /* shift the buffer if it isn't empty */ + if(dataSize != 0) { + u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */ + } + + + /* record how much buffer space is available */ + availLength = UFILE_UCHARBUFFER_SIZE - dataSize; + + /* Determine the # of codepage bytes needed to fill our UChar buffer */ + /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ + maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1); + + /* Read in the data to convert */ + if (f->fFileno == 0) { + /* Special case. Read from stdin one line at a time. */ + char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile); + bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0); + } + else { + /* A normal file */ + bytesRead = (int32_t)fread(charBuffer, + sizeof(char), + ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), + f->fFile); + } + + /* Set up conversion parameters */ + status = U_ZERO_ERROR; + mySource = charBuffer; + mySourceEnd = charBuffer + bytesRead; + myTarget = f->fUCBuffer + dataSize; + bufferSize = UFILE_UCHARBUFFER_SIZE; + + if(f->fConverter != NULL) { /* We have a valid converter */ + /* Perform the conversion */ + ucnv_toUnicode(f->fConverter, + &myTarget, + f->fUCBuffer + bufferSize, + &mySource, + mySourceEnd, + NULL, + (UBool)(feof(f->fFile) != 0), + &status); + + } else { /*weiv: do the invariant conversion */ + u_charsToUChars(mySource, myTarget, bytesRead); + myTarget += bytesRead; + } + + /* update the pointers into our array */ + str->fPos = str->fBuffer; + str->fLimit = myTarget; +} + +U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgets(UChar *s, + int32_t n, + UFILE *f) +{ + int32_t dataSize; + int32_t count; + UChar *alias; + const UChar *limit; + UChar *sItr; + UChar currDelim = 0; + u_localized_string *str; + + if (n <= 0) { + /* Caller screwed up. We need to write the null terminatior. */ + return NULL; + } + + /* fill the buffer if needed */ + str = &f->str; + if (str->fPos >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* subtract 1 from n to compensate for the terminator */ + --n; + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + + /* if 0 characters were left, return 0 */ + if (dataSize == 0) + return NULL; + + /* otherwise, iteratively fill the buffer and copy */ + count = 0; + sItr = s; + currDelim = 0; + while (dataSize > 0 && count < n) { + alias = str->fPos; + + /* Find how much to copy */ + if (dataSize < (n - count)) { + limit = str->fLimit; + } + else { + limit = alias + (n - count); + } + + if (!currDelim) { + /* Copy UChars until we find the first occurrence of a delimiter character */ + while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) { + count++; + *(sItr++) = *(alias++); + } + /* Preserve the newline */ + if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) { + if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) { + currDelim = *alias; + } + else { + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + count++; + *(sItr++) = *(alias++); + } + } + /* If we have a CRLF combination, preserve that too. */ + if (alias < limit) { + if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) { + count++; + *(sItr++) = *(alias++); + } + currDelim = 1; /* This isn't a newline, but it's used to say + that we should break later. We've checked all + possible newline combinations even across buffer + boundaries. */ + } + + /* update the current buffer position */ + str->fPos = alias; + + /* if we found a delimiter */ + if (currDelim == 1) { + /* break out */ + break; + } + + /* refill the buffer */ + ufile_fill_uchar_buffer(f); + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* add the terminator and return s */ + *sItr = 0x0000; + return s; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch(UFILE *f, UChar *ch) +{ + UBool isValidChar = FALSE; + + *ch = U_EOF; + /* if we have an available character in the buffer, return it */ + if(f->str.fPos < f->str.fLimit){ + *ch = *(f->str.fPos)++; + isValidChar = TRUE; + } + else { + /* otherwise, fill the buffer and return the next character */ + if(f->str.fPos >= f->str.fLimit) { + ufile_fill_uchar_buffer(f); + } + if(f->str.fPos < f->str.fLimit) { + *ch = *(f->str.fPos)++; + isValidChar = TRUE; + } + } + return isValidChar; +} + +U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetc(UFILE *f) +{ + UChar ch; + ufile_getch(f, &ch); + return ch; +} + +U_CFUNC UBool U_EXPORT2 +ufile_getch32(UFILE *f, UChar32 *c32) +{ + UBool isValidChar = FALSE; + u_localized_string *str; + + *c32 = U_EOF; + + /* Fill the buffer if it is empty */ + str = &f->str; + if (f && str->fPos + 1 >= str->fLimit) { + ufile_fill_uchar_buffer(f); + } + + /* Get the next character in the buffer */ + if (str->fPos < str->fLimit) { + *c32 = *(str->fPos)++; + if (U_IS_LEAD(*c32)) { + if (str->fPos < str->fLimit) { + UChar c16 = *(str->fPos)++; + *c32 = U16_GET_SUPPLEMENTARY(*c32, c16); + isValidChar = TRUE; + } + else { + *c32 = U_EOF; + } + } + else { + isValidChar = TRUE; + } + } + + return isValidChar; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fgetcx(UFILE *f) +{ + UChar32 ch; + ufile_getch32(f, &ch); + return ch; +} + +U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_fungetc(UChar32 ch, + UFILE *f) +{ + u_localized_string *str; + + str = &f->str; + + /* if we're at the beginning of the buffer, sorry! */ + if (str->fPos == str->fBuffer + || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer)) + { + ch = U_EOF; + } + else { + /* otherwise, put the character back */ + /* Remember, read them back on in the reverse order. */ + if (U_IS_LEAD(ch)) { + if (*--(str->fPos) != U16_TRAIL(ch) + || *--(str->fPos) != U16_LEAD(ch)) + { + ch = U_EOF; + } + } + else if (*--(str->fPos) != ch) { + ch = U_EOF; + } + } + return ch; +} + +U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ +u_file_read( UChar *chars, + int32_t count, + UFILE *f) +{ + int32_t dataSize; + int32_t read = 0; + u_localized_string *str = &f->str; + + do { + + /* determine the amount of data in the buffer */ + dataSize = (int32_t)(str->fLimit - str->fPos); + if (dataSize <= 0) { + /* fill the buffer */ + ufile_fill_uchar_buffer(f); + dataSize = (int32_t)(str->fLimit - str->fPos); + } + + /* Make sure that we don't read too much */ + if (dataSize > (count - read)) { + dataSize = count - read; + } + + /* copy the current data in the buffer */ + memcpy(chars + read, str->fPos, dataSize * sizeof(UChar)); + + /* update number of items read */ + read += dataSize; + + /* update the current buffer position */ + str->fPos += dataSize; + } + while (dataSize != 0 && read < count); + + return read; +} +#endif |