diff options
author | setser <setser@yandex-team.ru> | 2022-05-09 00:13:37 +0300 |
---|---|---|
committer | setser <setser@yandex-team.ru> | 2022-05-09 00:13:37 +0300 |
commit | e87e3fc8d0e04eb7ba3eee221bb91613b527ad85 (patch) | |
tree | 5279c128bdbdf902b9a08d9fae8e55b91910a553 /contrib/libs/libxml/encoding.c | |
parent | f4f3e4024a1f32bd0bc3fa20239025a1b179e42d (diff) | |
download | ydb-e87e3fc8d0e04eb7ba3eee221bb91613b527ad85.tar.gz |
Update libxml to 2.9.13
ref:f572491d236694e847142c36f0f5546c649e05d7
Diffstat (limited to 'contrib/libs/libxml/encoding.c')
-rw-r--r-- | contrib/libs/libxml/encoding.c | 132 |
1 files changed, 95 insertions, 37 deletions
diff --git a/contrib/libs/libxml/encoding.c b/contrib/libs/libxml/encoding.c index 89ac8dd2c9..945d2c4470 100644 --- a/contrib/libs/libxml/encoding.c +++ b/contrib/libs/libxml/encoding.c @@ -80,7 +80,7 @@ static int xmlLittleEndian = 1; /** * xmlEncodingErrMemory: - * @extra: extra informations + * @extra: extra information * * Handle an out of memory condition */ @@ -174,7 +174,7 @@ closeIcuConverter(uconv_t *conv) * Returns 0 if success, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int asciiToUTF8(unsigned char* out, int *outlen, @@ -221,7 +221,7 @@ asciiToUTF8(unsigned char* out, int *outlen, * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int UTF8Toascii(unsigned char* out, int *outlen, @@ -305,7 +305,7 @@ UTF8Toascii(unsigned char* out, int *outlen, * Returns the number of bytes written if success, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ int isolat1ToUTF8(unsigned char* out, int *outlen, @@ -377,6 +377,11 @@ UTF8ToUTF8(unsigned char* out, int *outlen, if (len < 0) return(-1); + /* + * FIXME: Conversion functions must assure valid UTF-8, so we have + * to check for UTF-8 validity. Preferably, this converter shouldn't + * be used at all. + */ memcpy(out, inb, len); *outlen = len; @@ -400,7 +405,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen, or -1 otherwise * The value of @inlen after return is the number of octets consumed * if the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ int UTF8Toisolat1(unsigned char* out, int *outlen, @@ -500,13 +505,18 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, { unsigned char* outstart = out; const unsigned char* processed = inb; - unsigned char* outend = out + *outlen; + unsigned char* outend; unsigned short* in = (unsigned short*) inb; unsigned short* inend; unsigned int c, d, inlen; unsigned char *tmp; int bits; + if (*outlen == 0) { + *inlenb = 0; + return(0); + } + outend = out + *outlen; if ((*inlenb % 2) == 1) (*inlenb)--; inlen = *inlenb / 2; @@ -521,7 +531,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen, in++; } if ((c & 0xFC00) == 0xD800) { /* surrogates */ - if (in >= inend) { /* (in > inend) shouldn't happens */ + if (in >= inend) { /* handle split mutli-byte characters */ break; } if (xmlLittleEndian) { @@ -738,38 +748,39 @@ UTF16BEToUTF8(unsigned char* out, int *outlen, { unsigned char* outstart = out; const unsigned char* processed = inb; - unsigned char* outend = out + *outlen; + unsigned char* outend; unsigned short* in = (unsigned short*) inb; unsigned short* inend; unsigned int c, d, inlen; unsigned char *tmp; int bits; + if (*outlen == 0) { + *inlenb = 0; + return(0); + } + outend = out + *outlen; if ((*inlenb % 2) == 1) (*inlenb)--; inlen = *inlenb / 2; inend= in + inlen; - while (in < inend) { + while ((in < inend) && (out - outstart + 5 < *outlen)) { if (xmlLittleEndian) { tmp = (unsigned char *) in; c = *tmp++; - c = c << 8; - c = c | (unsigned int) *tmp; + c = (c << 8) | (unsigned int) *tmp; in++; } else { c= *in++; } if ((c & 0xFC00) == 0xD800) { /* surrogates */ - if (in >= inend) { /* (in > inend) shouldn't happens */ - *outlen = out - outstart; - *inlenb = processed - inb; - return(-2); + if (in >= inend) { /* handle split mutli-byte characters */ + break; } if (xmlLittleEndian) { tmp = (unsigned char *) in; d = *tmp++; - d = d << 8; - d = d | (unsigned int) *tmp; + d = (d << 8) | (unsigned int) *tmp; in++; } else { d= *in++; @@ -1493,16 +1504,25 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) { if ((handler == NULL) || (handlers == NULL)) { xmlEncodingErr(XML_I18N_NO_HANDLER, "xmlRegisterCharEncodingHandler: NULL handler !\n", NULL); - return; + goto free_handler; } if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) { xmlEncodingErr(XML_I18N_EXCESS_HANDLER, "xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n", "MAX_ENCODING_HANDLERS"); - return; + goto free_handler; } handlers[nbCharEncodingHandler++] = handler; + return; + +free_handler: + if (handler != NULL) { + if (handler->name != NULL) { + xmlFree(handler->name); + } + xmlFree(handler); + } } /** @@ -1794,7 +1814,7 @@ xmlFindCharEncodingHandler(const char *name) { * @cd: iconv converter data structure * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out - * @in: a pointer to an array of ISO Latin 1 chars + * @in: a pointer to an array of input bytes * @inlen: the length of @in * * Returns 0 if success, or @@ -1805,7 +1825,7 @@ xmlFindCharEncodingHandler(const char *name) { * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, @@ -1813,7 +1833,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, size_t icv_inlen, icv_outlen; const char *icv_in = (const char *) in; char *icv_out = (char *) out; - int ret; + size_t ret; if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) { if (outlen != NULL) *outlen = 0; @@ -1824,7 +1844,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen); *inlen -= icv_inlen; *outlen -= icv_outlen; - if ((icv_inlen != 0) || (ret == -1)) { + if ((icv_inlen != 0) || (ret == (size_t) -1)) { #ifdef EILSEQ if (errno == EILSEQ) { return -2; @@ -1861,7 +1881,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, * @toUnicode : non-zero if toUnicode. 0 otherwise. * @out: a pointer to an array of bytes to store the result * @outlen: the length of @out - * @in: a pointer to an array of ISO Latin 1 chars + * @in: a pointer to an array of input bytes * @inlen: the length of @in * @flush: if true, indicates end of input * @@ -1873,7 +1893,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen, * * The value of @inlen after return is the number of octets consumed * as the return value is positive, else unpredictable. - * The value of @outlen after return is the number of octets consumed. + * The value of @outlen after return is the number of octets produced. */ static int xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, @@ -1922,6 +1942,25 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen, * * ************************************************************************/ +/** + * xmlEncInputChunk: + * @handler: encoding handler + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of input bytes + * @inlen: the length of @in + * @flush: flush (ICU-related) + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is 0, else unpredictable. + * The value of @outlen after return is the number of octets produced. + */ static int xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen, int flush) { @@ -1930,6 +1969,8 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, if (handler->input != NULL) { ret = handler->input(out, outlen, in, inlen); + if (ret > 0) + ret = 0; } #ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_in != NULL) { @@ -1951,7 +1992,25 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out, return(ret); } -/* Returns -4 if no output function was found. */ +/** + * xmlEncOutputChunk: + * @handler: encoding handler + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of input bytes + * @inlen: the length of @in + * + * Returns 0 if success, or + * -1 by lack of space, or + * -2 if the transcoding fails (for *in is not valid utf8 string or + * the result of transformation can't fit into the encoding we want), or + * -3 if there the last byte can't form a single output char. + * -4 if no output function was found. + * + * The value of @inlen after return is the number of octets consumed + * as the return value is 0, else unpredictable. + * The value of @outlen after return is the number of octets produced. + */ static int xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, int *outlen, const unsigned char *in, int *inlen) { @@ -1959,6 +2018,8 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, if (handler->output != NULL) { ret = handler->output(out, outlen, in, inlen); + if (ret > 0) + ret = 0; } #ifdef LIBXML_ICONV_ENABLED else if (handler->iconv_out != NULL) { @@ -1968,7 +2029,7 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out, #ifdef LIBXML_ICU_ENABLED else if (handler->uconv_out != NULL) { ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen, - TRUE); + 1); } #endif /* LIBXML_ICU_ENABLED */ else { @@ -2064,7 +2125,7 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out, */ if (ret == -3) ret = 0; if (ret == -1) ret = 0; - return(ret); + return(written ? written : ret); } /** @@ -2194,7 +2255,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len) */ if (ret == -3) ret = 0; if (ret == -1) ret = 0; - return(ret); + return(c_out ? c_out : ret); } /** @@ -2404,7 +2465,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init) { int ret; size_t written; - size_t writtentot = 0; + int writtentot = 0; size_t toconv; int c_in; int c_out; @@ -2437,7 +2498,7 @@ retry: xmlGenericError(xmlGenericErrorContext, "initialized encoder\n"); #endif - return(0); + return(c_out); } /* @@ -2445,7 +2506,7 @@ retry: */ toconv = xmlBufUse(in); if (toconv == 0) - return (0); + return (writtentot); if (toconv > 64 * 1024) toconv = 64 * 1024; if (toconv * 4 >= written) { @@ -2550,7 +2611,7 @@ retry: goto retry; } } - return(ret); + return(writtentot ? writtentot : ret); } #endif @@ -2579,7 +2640,6 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out, int written; int writtentot = 0; int toconv; - int output = 0; if (handler == NULL) return(-1); if (out == NULL) return(-1); @@ -2632,8 +2692,6 @@ retry: ret = -3; } - if (ret >= 0) output += ret; - /* * Attempt to handle error cases */ @@ -2715,7 +2773,7 @@ retry: goto retry; } } - return(ret); + return(writtentot ? writtentot : ret); } /** |