aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/libxml/encoding.c
diff options
context:
space:
mode:
authorsetser <setser@yandex-team.ru>2022-05-09 00:13:37 +0300
committersetser <setser@yandex-team.ru>2022-05-09 00:13:37 +0300
commite87e3fc8d0e04eb7ba3eee221bb91613b527ad85 (patch)
tree5279c128bdbdf902b9a08d9fae8e55b91910a553 /contrib/libs/libxml/encoding.c
parentf4f3e4024a1f32bd0bc3fa20239025a1b179e42d (diff)
downloadydb-e87e3fc8d0e04eb7ba3eee221bb91613b527ad85.tar.gz
Update libxml to 2.9.13
ref:f572491d236694e847142c36f0f5546c649e05d7
Diffstat (limited to 'contrib/libs/libxml/encoding.c')
-rw-r--r--contrib/libs/libxml/encoding.c132
1 files changed, 95 insertions, 37 deletions
diff --git a/contrib/libs/libxml/encoding.c b/contrib/libs/libxml/encoding.c
index 89ac8dd2c9..945d2c4470 100644
--- a/contrib/libs/libxml/encoding.c
+++ b/contrib/libs/libxml/encoding.c
@@ -80,7 +80,7 @@ static int xmlLittleEndian = 1;
/**
* xmlEncodingErrMemory:
- * @extra: extra informations
+ * @extra: extra information
*
* Handle an out of memory condition
*/
@@ -174,7 +174,7 @@ closeIcuConverter(uconv_t *conv)
* Returns 0 if success, or -1 otherwise
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
- * The value of @outlen after return is the number of octets consumed.
+ * The value of @outlen after return is the number of octets produced.
*/
static int
asciiToUTF8(unsigned char* out, int *outlen,
@@ -221,7 +221,7 @@ asciiToUTF8(unsigned char* out, int *outlen,
* Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
- * The value of @outlen after return is the number of octets consumed.
+ * The value of @outlen after return is the number of octets produced.
*/
static int
UTF8Toascii(unsigned char* out, int *outlen,
@@ -305,7 +305,7 @@ UTF8Toascii(unsigned char* out, int *outlen,
* Returns the number of bytes written if success, or -1 otherwise
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
- * The value of @outlen after return is the number of octets consumed.
+ * The value of @outlen after return is the number of octets produced.
*/
int
isolat1ToUTF8(unsigned char* out, int *outlen,
@@ -377,6 +377,11 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
if (len < 0)
return(-1);
+ /*
+ * FIXME: Conversion functions must assure valid UTF-8, so we have
+ * to check for UTF-8 validity. Preferably, this converter shouldn't
+ * be used at all.
+ */
memcpy(out, inb, len);
*outlen = len;
@@ -400,7 +405,7 @@ UTF8ToUTF8(unsigned char* out, int *outlen,
or -1 otherwise
* The value of @inlen after return is the number of octets consumed
* if the return value is positive, else unpredictable.
- * The value of @outlen after return is the number of octets consumed.
+ * The value of @outlen after return is the number of octets produced.
*/
int
UTF8Toisolat1(unsigned char* out, int *outlen,
@@ -500,13 +505,18 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
{
unsigned char* outstart = out;
const unsigned char* processed = inb;
- unsigned char* outend = out + *outlen;
+ unsigned char* outend;
unsigned short* in = (unsigned short*) inb;
unsigned short* inend;
unsigned int c, d, inlen;
unsigned char *tmp;
int bits;
+ if (*outlen == 0) {
+ *inlenb = 0;
+ return(0);
+ }
+ outend = out + *outlen;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
@@ -521,7 +531,7 @@ UTF16LEToUTF8(unsigned char* out, int *outlen,
in++;
}
if ((c & 0xFC00) == 0xD800) { /* surrogates */
- if (in >= inend) { /* (in > inend) shouldn't happens */
+ if (in >= inend) { /* handle split mutli-byte characters */
break;
}
if (xmlLittleEndian) {
@@ -738,38 +748,39 @@ UTF16BEToUTF8(unsigned char* out, int *outlen,
{
unsigned char* outstart = out;
const unsigned char* processed = inb;
- unsigned char* outend = out + *outlen;
+ unsigned char* outend;
unsigned short* in = (unsigned short*) inb;
unsigned short* inend;
unsigned int c, d, inlen;
unsigned char *tmp;
int bits;
+ if (*outlen == 0) {
+ *inlenb = 0;
+ return(0);
+ }
+ outend = out + *outlen;
if ((*inlenb % 2) == 1)
(*inlenb)--;
inlen = *inlenb / 2;
inend= in + inlen;
- while (in < inend) {
+ while ((in < inend) && (out - outstart + 5 < *outlen)) {
if (xmlLittleEndian) {
tmp = (unsigned char *) in;
c = *tmp++;
- c = c << 8;
- c = c | (unsigned int) *tmp;
+ c = (c << 8) | (unsigned int) *tmp;
in++;
} else {
c= *in++;
}
if ((c & 0xFC00) == 0xD800) { /* surrogates */
- if (in >= inend) { /* (in > inend) shouldn't happens */
- *outlen = out - outstart;
- *inlenb = processed - inb;
- return(-2);
+ if (in >= inend) { /* handle split mutli-byte characters */
+ break;
}
if (xmlLittleEndian) {
tmp = (unsigned char *) in;
d = *tmp++;
- d = d << 8;
- d = d | (unsigned int) *tmp;
+ d = (d << 8) | (unsigned int) *tmp;
in++;
} else {
d= *in++;
@@ -1493,16 +1504,25 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
if ((handler == NULL) || (handlers == NULL)) {
xmlEncodingErr(XML_I18N_NO_HANDLER,
"xmlRegisterCharEncodingHandler: NULL handler !\n", NULL);
- return;
+ goto free_handler;
}
if (nbCharEncodingHandler >= MAX_ENCODING_HANDLERS) {
xmlEncodingErr(XML_I18N_EXCESS_HANDLER,
"xmlRegisterCharEncodingHandler: Too many handler registered, see %s\n",
"MAX_ENCODING_HANDLERS");
- return;
+ goto free_handler;
}
handlers[nbCharEncodingHandler++] = handler;
+ return;
+
+free_handler:
+ if (handler != NULL) {
+ if (handler->name != NULL) {
+ xmlFree(handler->name);
+ }
+ xmlFree(handler);
+ }
}
/**
@@ -1794,7 +1814,7 @@ xmlFindCharEncodingHandler(const char *name) {
* @cd: iconv converter data structure
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
- * @in: a pointer to an array of ISO Latin 1 chars
+ * @in: a pointer to an array of input bytes
* @inlen: the length of @in
*
* Returns 0 if success, or
@@ -1805,7 +1825,7 @@ xmlFindCharEncodingHandler(const char *name) {
*
* The value of @inlen after return is the number of octets consumed
* as the return value is positive, else unpredictable.
- * The value of @outlen after return is the number of octets consumed.
+ * The value of @outlen after return is the number of octets produced.
*/
static int
xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
@@ -1813,7 +1833,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
size_t icv_inlen, icv_outlen;
const char *icv_in = (const char *) in;
char *icv_out = (char *) out;
- int ret;
+ size_t ret;
if ((out == NULL) || (outlen == NULL) || (inlen == NULL) || (in == NULL)) {
if (outlen != NULL) *outlen = 0;
@@ -1824,7 +1844,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
ret = iconv(cd, (ICONV_CONST char **) &icv_in, &icv_inlen, &icv_out, &icv_outlen);
*inlen -= icv_inlen;
*outlen -= icv_outlen;
- if ((icv_inlen != 0) || (ret == -1)) {
+ if ((icv_inlen != 0) || (ret == (size_t) -1)) {
#ifdef EILSEQ
if (errno == EILSEQ) {
return -2;
@@ -1861,7 +1881,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
* @toUnicode : non-zero if toUnicode. 0 otherwise.
* @out: a pointer to an array of bytes to store the result
* @outlen: the length of @out
- * @in: a pointer to an array of ISO Latin 1 chars
+ * @in: a pointer to an array of input bytes
* @inlen: the length of @in
* @flush: if true, indicates end of input
*
@@ -1873,7 +1893,7 @@ xmlIconvWrapper(iconv_t cd, unsigned char *out, int *outlen,
*
* The value of @inlen after return is the number of octets consumed
* as the return value is positive, else unpredictable.
- * The value of @outlen after return is the number of octets consumed.
+ * The value of @outlen after return is the number of octets produced.
*/
static int
xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
@@ -1922,6 +1942,25 @@ xmlUconvWrapper(uconv_t *cd, int toUnicode, unsigned char *out, int *outlen,
* *
************************************************************************/
+/**
+ * xmlEncInputChunk:
+ * @handler: encoding handler
+ * @out: a pointer to an array of bytes to store the result
+ * @outlen: the length of @out
+ * @in: a pointer to an array of input bytes
+ * @inlen: the length of @in
+ * @flush: flush (ICU-related)
+ *
+ * Returns 0 if success, or
+ * -1 by lack of space, or
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ * -3 if there the last byte can't form a single output char.
+ *
+ * The value of @inlen after return is the number of octets consumed
+ * as the return value is 0, else unpredictable.
+ * The value of @outlen after return is the number of octets produced.
+ */
static int
xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
int *outlen, const unsigned char *in, int *inlen, int flush) {
@@ -1930,6 +1969,8 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
if (handler->input != NULL) {
ret = handler->input(out, outlen, in, inlen);
+ if (ret > 0)
+ ret = 0;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_in != NULL) {
@@ -1951,7 +1992,25 @@ xmlEncInputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
return(ret);
}
-/* Returns -4 if no output function was found. */
+/**
+ * xmlEncOutputChunk:
+ * @handler: encoding handler
+ * @out: a pointer to an array of bytes to store the result
+ * @outlen: the length of @out
+ * @in: a pointer to an array of input bytes
+ * @inlen: the length of @in
+ *
+ * Returns 0 if success, or
+ * -1 by lack of space, or
+ * -2 if the transcoding fails (for *in is not valid utf8 string or
+ * the result of transformation can't fit into the encoding we want), or
+ * -3 if there the last byte can't form a single output char.
+ * -4 if no output function was found.
+ *
+ * The value of @inlen after return is the number of octets consumed
+ * as the return value is 0, else unpredictable.
+ * The value of @outlen after return is the number of octets produced.
+ */
static int
xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
int *outlen, const unsigned char *in, int *inlen) {
@@ -1959,6 +2018,8 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
if (handler->output != NULL) {
ret = handler->output(out, outlen, in, inlen);
+ if (ret > 0)
+ ret = 0;
}
#ifdef LIBXML_ICONV_ENABLED
else if (handler->iconv_out != NULL) {
@@ -1968,7 +2029,7 @@ xmlEncOutputChunk(xmlCharEncodingHandler *handler, unsigned char *out,
#ifdef LIBXML_ICU_ENABLED
else if (handler->uconv_out != NULL) {
ret = xmlUconvWrapper(handler->uconv_out, 0, out, outlen, in, inlen,
- TRUE);
+ 1);
}
#endif /* LIBXML_ICU_ENABLED */
else {
@@ -2064,7 +2125,7 @@ xmlCharEncFirstLineInt(xmlCharEncodingHandler *handler, xmlBufferPtr out,
*/
if (ret == -3) ret = 0;
if (ret == -1) ret = 0;
- return(ret);
+ return(written ? written : ret);
}
/**
@@ -2194,7 +2255,7 @@ xmlCharEncFirstLineInput(xmlParserInputBufferPtr input, int len)
*/
if (ret == -3) ret = 0;
if (ret == -1) ret = 0;
- return(ret);
+ return(c_out ? c_out : ret);
}
/**
@@ -2404,7 +2465,7 @@ xmlCharEncOutput(xmlOutputBufferPtr output, int init)
{
int ret;
size_t written;
- size_t writtentot = 0;
+ int writtentot = 0;
size_t toconv;
int c_in;
int c_out;
@@ -2437,7 +2498,7 @@ retry:
xmlGenericError(xmlGenericErrorContext,
"initialized encoder\n");
#endif
- return(0);
+ return(c_out);
}
/*
@@ -2445,7 +2506,7 @@ retry:
*/
toconv = xmlBufUse(in);
if (toconv == 0)
- return (0);
+ return (writtentot);
if (toconv > 64 * 1024)
toconv = 64 * 1024;
if (toconv * 4 >= written) {
@@ -2550,7 +2611,7 @@ retry:
goto retry;
}
}
- return(ret);
+ return(writtentot ? writtentot : ret);
}
#endif
@@ -2579,7 +2640,6 @@ xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
int written;
int writtentot = 0;
int toconv;
- int output = 0;
if (handler == NULL) return(-1);
if (out == NULL) return(-1);
@@ -2632,8 +2692,6 @@ retry:
ret = -3;
}
- if (ret >= 0) output += ret;
-
/*
* Attempt to handle error cases
*/
@@ -2715,7 +2773,7 @@ retry:
goto retry;
}
}
- return(ret);
+ return(writtentot ? writtentot : ret);
}
/**