summaryrefslogtreecommitdiffstats
path: root/contrib/libs/icu/common/ucnvbocu.cpp
diff options
context:
space:
mode:
authorneksard <[email protected]>2022-02-10 16:45:33 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:45:33 +0300
commit1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
treeb2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/common/ucnvbocu.cpp
parent8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/common/ucnvbocu.cpp')
-rw-r--r--contrib/libs/icu/common/ucnvbocu.cpp2818
1 files changed, 1409 insertions, 1409 deletions
diff --git a/contrib/libs/icu/common/ucnvbocu.cpp b/contrib/libs/icu/common/ucnvbocu.cpp
index ee115e0ebe7..7c2aab56558 100644
--- a/contrib/libs/icu/common/ucnvbocu.cpp
+++ b/contrib/libs/icu/common/ucnvbocu.cpp
@@ -1,1413 +1,1413 @@
// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-******************************************************************************
-*
-* Copyright (C) 2002-2016, International Business Machines
-* Corporation and others. All Rights Reserved.
-*
-******************************************************************************
-* file name: ucnvbocu.cpp
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ucnvbocu.cpp
* encoding: UTF-8
-* tab size: 8 (not used)
-* indentation:4
-*
-* created on: 2002mar27
-* created by: Markus W. Scherer
-*
-* This is an implementation of the Binary Ordered Compression for Unicode,
-* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
-*/
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
-
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_cb.h"
-#include "unicode/utf16.h"
-#include "putilimp.h"
-#include "ucnv_bld.h"
-#include "ucnv_cnv.h"
-#include "uassert.h"
-
-/* BOCU-1 constants and macros ---------------------------------------------- */
-
-/*
- * BOCU-1 encodes the code points of a Unicode string as
- * a sequence of byte-encoded differences (slope detection),
- * preserving lexical order.
- *
- * Optimize the difference-taking for runs of Unicode text within
- * small scripts:
- *
- * Most small scripts are allocated within aligned 128-blocks of Unicode
- * code points. Lexical order is preserved if the "previous code point" state
- * is always moved into the middle of such a block.
- *
- * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
- * areas into the middle of those areas.
- *
- * C0 control codes and space are encoded with their US-ASCII bytes.
- * "prev" is reset for C0 controls but not for space.
- */
-
-/* initial value for "prev": middle of the ASCII range */
-#define BOCU1_ASCII_PREV 0x40
-
-/* bounding byte values for differences */
-#define BOCU1_MIN 0x21
-#define BOCU1_MIDDLE 0x90
-#define BOCU1_MAX_LEAD 0xfe
-#define BOCU1_MAX_TRAIL 0xff
-#define BOCU1_RESET 0xff
-
-/* number of lead bytes */
-#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1)
-
-/* adjust trail byte counts for the use of some C0 control byte values */
-#define BOCU1_TRAIL_CONTROLS_COUNT 20
-#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
-
-/* number of trail bytes */
-#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
-
-/*
- * number of positive and negative single-byte codes
- * (counting 0==BOCU1_MIDDLE among the positive ones)
- */
-#define BOCU1_SINGLE 64
-
-/* number of lead bytes for positive and negative 2/3/4-byte sequences */
-#define BOCU1_LEAD_2 43
-#define BOCU1_LEAD_3 3
-#define BOCU1_LEAD_4 1
-
-/* The difference value range for single-byters. */
-#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1)
-#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE)
-
-/* The difference value range for double-byters. */
-#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
-#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
-
-/* The difference value range for 3-byters. */
-#define BOCU1_REACH_POS_3 \
- (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
-
-#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
-
-/* The lead byte start values. */
-#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
-#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2)
-#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3)
- /* ==BOCU1_MAX_LEAD */
-
-#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
-#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2)
-#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3)
- /* ==BOCU1_MIN+1 */
-
-/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
-#define BOCU1_LENGTH_FROM_LEAD(lead) \
- ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
- (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
- (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
-
-/* The length of a byte sequence, according to its packed form. */
-#define BOCU1_LENGTH_FROM_PACKED(packed) \
- ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
-
-/*
- * 12 commonly used C0 control codes (and space) are only used to encode
- * themselves directly,
- * which makes BOCU-1 MIME-usable and reasonably safe for
- * ASCII-oriented software.
- *
- * These controls are
- * 0 NUL
- *
- * 7 BEL
- * 8 BS
- *
- * 9 TAB
- * a LF
- * b VT
- * c FF
- * d CR
- *
- * e SO
- * f SI
- *
- * 1a SUB
- * 1b ESC
- *
- * The other 20 C0 controls are also encoded directly (to preserve order)
- * but are also used as trail bytes in difference encoding
- * (for better compression).
- */
-#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
-
-/*
- * Byte value map for control codes,
- * from external byte values 0x00..0x20
- * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
- * External byte values that are illegal as trail bytes are mapped to -1.
- */
-static const int8_t
-bocu1ByteToTrail[BOCU1_MIN]={
-/* 0 1 2 3 4 5 6 7 */
- -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
-
-/* 8 9 a b c d e f */
- -1, -1, -1, -1, -1, -1, -1, -1,
-
-/* 10 11 12 13 14 15 16 17 */
- 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
-
-/* 18 19 1a 1b 1c 1d 1e 1f */
- 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
-
-/* 20 */
- -1
-};
-
-/*
- * Byte value map for control codes,
- * from trail byte values 0..19 (0..0x13) as used in the difference calculation
- * to external byte values 0x00..0x20.
- */
-static const int8_t
-bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
-/* 0 1 2 3 4 5 6 7 */
- 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
-
-/* 8 9 a b c d e f */
- 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
-
-/* 10 11 12 13 */
- 0x1c, 0x1d, 0x1e, 0x1f
-};
-
-/**
- * Integer division and modulo with negative numerators
- * yields negative modulo results and quotients that are one more than
- * what we need here.
- * This macro adjust the results so that the modulo-value m is always >=0.
- *
- * For positive n, the if() condition is always FALSE.
- *
- * @param n Number to be split into quotient and rest.
- * Will be modified to contain the quotient.
- * @param d Divisor.
- * @param m Output variable for the rest (modulo result).
- */
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar27
+* created by: Markus W. Scherer
+*
+* This is an implementation of the Binary Ordered Compression for Unicode,
+* in its MIME-friendly form as defined in http://www.unicode.org/notes/tn6/
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_cb.h"
+#include "unicode/utf16.h"
+#include "putilimp.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "uassert.h"
+
+/* BOCU-1 constants and macros ---------------------------------------------- */
+
+/*
+ * BOCU-1 encodes the code points of a Unicode string as
+ * a sequence of byte-encoded differences (slope detection),
+ * preserving lexical order.
+ *
+ * Optimize the difference-taking for runs of Unicode text within
+ * small scripts:
+ *
+ * Most small scripts are allocated within aligned 128-blocks of Unicode
+ * code points. Lexical order is preserved if the "previous code point" state
+ * is always moved into the middle of such a block.
+ *
+ * Additionally, "prev" is moved from anywhere in the Unihan and Hangul
+ * areas into the middle of those areas.
+ *
+ * C0 control codes and space are encoded with their US-ASCII bytes.
+ * "prev" is reset for C0 controls but not for space.
+ */
+
+/* initial value for "prev": middle of the ASCII range */
+#define BOCU1_ASCII_PREV 0x40
+
+/* bounding byte values for differences */
+#define BOCU1_MIN 0x21
+#define BOCU1_MIDDLE 0x90
+#define BOCU1_MAX_LEAD 0xfe
+#define BOCU1_MAX_TRAIL 0xff
+#define BOCU1_RESET 0xff
+
+/* number of lead bytes */
+#define BOCU1_COUNT (BOCU1_MAX_LEAD-BOCU1_MIN+1)
+
+/* adjust trail byte counts for the use of some C0 control byte values */
+#define BOCU1_TRAIL_CONTROLS_COUNT 20
+#define BOCU1_TRAIL_BYTE_OFFSET (BOCU1_MIN-BOCU1_TRAIL_CONTROLS_COUNT)
+
+/* number of trail bytes */
+#define BOCU1_TRAIL_COUNT ((BOCU1_MAX_TRAIL-BOCU1_MIN+1)+BOCU1_TRAIL_CONTROLS_COUNT)
+
+/*
+ * number of positive and negative single-byte codes
+ * (counting 0==BOCU1_MIDDLE among the positive ones)
+ */
+#define BOCU1_SINGLE 64
+
+/* number of lead bytes for positive and negative 2/3/4-byte sequences */
+#define BOCU1_LEAD_2 43
+#define BOCU1_LEAD_3 3
+#define BOCU1_LEAD_4 1
+
+/* The difference value range for single-byters. */
+#define BOCU1_REACH_POS_1 (BOCU1_SINGLE-1)
+#define BOCU1_REACH_NEG_1 (-BOCU1_SINGLE)
+
+/* The difference value range for double-byters. */
+#define BOCU1_REACH_POS_2 (BOCU1_REACH_POS_1+BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
+#define BOCU1_REACH_NEG_2 (BOCU1_REACH_NEG_1-BOCU1_LEAD_2*BOCU1_TRAIL_COUNT)
+
+/* The difference value range for 3-byters. */
+#define BOCU1_REACH_POS_3 \
+ (BOCU1_REACH_POS_2+BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
+
+#define BOCU1_REACH_NEG_3 (BOCU1_REACH_NEG_2-BOCU1_LEAD_3*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT)
+
+/* The lead byte start values. */
+#define BOCU1_START_POS_2 (BOCU1_MIDDLE+BOCU1_REACH_POS_1+1)
+#define BOCU1_START_POS_3 (BOCU1_START_POS_2+BOCU1_LEAD_2)
+#define BOCU1_START_POS_4 (BOCU1_START_POS_3+BOCU1_LEAD_3)
+ /* ==BOCU1_MAX_LEAD */
+
+#define BOCU1_START_NEG_2 (BOCU1_MIDDLE+BOCU1_REACH_NEG_1)
+#define BOCU1_START_NEG_3 (BOCU1_START_NEG_2-BOCU1_LEAD_2)
+#define BOCU1_START_NEG_4 (BOCU1_START_NEG_3-BOCU1_LEAD_3)
+ /* ==BOCU1_MIN+1 */
+
+/* The length of a byte sequence, according to the lead byte (!=BOCU1_RESET). */
+#define BOCU1_LENGTH_FROM_LEAD(lead) \
+ ((BOCU1_START_NEG_2<=(lead) && (lead)<BOCU1_START_POS_2) ? 1 : \
+ (BOCU1_START_NEG_3<=(lead) && (lead)<BOCU1_START_POS_3) ? 2 : \
+ (BOCU1_START_NEG_4<=(lead) && (lead)<BOCU1_START_POS_4) ? 3 : 4)
+
+/* The length of a byte sequence, according to its packed form. */
+#define BOCU1_LENGTH_FROM_PACKED(packed) \
+ ((uint32_t)(packed)<0x04000000 ? (packed)>>24 : 4)
+
+/*
+ * 12 commonly used C0 control codes (and space) are only used to encode
+ * themselves directly,
+ * which makes BOCU-1 MIME-usable and reasonably safe for
+ * ASCII-oriented software.
+ *
+ * These controls are
+ * 0 NUL
+ *
+ * 7 BEL
+ * 8 BS
+ *
+ * 9 TAB
+ * a LF
+ * b VT
+ * c FF
+ * d CR
+ *
+ * e SO
+ * f SI
+ *
+ * 1a SUB
+ * 1b ESC
+ *
+ * The other 20 C0 controls are also encoded directly (to preserve order)
+ * but are also used as trail bytes in difference encoding
+ * (for better compression).
+ */
+#define BOCU1_TRAIL_TO_BYTE(t) ((t)>=BOCU1_TRAIL_CONTROLS_COUNT ? (t)+BOCU1_TRAIL_BYTE_OFFSET : bocu1TrailToByte[t])
+
+/*
+ * Byte value map for control codes,
+ * from external byte values 0x00..0x20
+ * to trail byte values 0..19 (0..0x13) as used in the difference calculation.
+ * External byte values that are illegal as trail bytes are mapped to -1.
+ */
+static const int8_t
+bocu1ByteToTrail[BOCU1_MIN]={
+/* 0 1 2 3 4 5 6 7 */
+ -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, -1,
+
+/* 8 9 a b c d e f */
+ -1, -1, -1, -1, -1, -1, -1, -1,
+
+/* 10 11 12 13 14 15 16 17 */
+ 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+
+/* 18 19 1a 1b 1c 1d 1e 1f */
+ 0x0e, 0x0f, -1, -1, 0x10, 0x11, 0x12, 0x13,
+
+/* 20 */
+ -1
+};
+
+/*
+ * Byte value map for control codes,
+ * from trail byte values 0..19 (0..0x13) as used in the difference calculation
+ * to external byte values 0x00..0x20.
+ */
+static const int8_t
+bocu1TrailToByte[BOCU1_TRAIL_CONTROLS_COUNT]={
+/* 0 1 2 3 4 5 6 7 */
+ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x10, 0x11,
+
+/* 8 9 a b c d e f */
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+/* 10 11 12 13 */
+ 0x1c, 0x1d, 0x1e, 0x1f
+};
+
+/**
+ * Integer division and modulo with negative numerators
+ * yields negative modulo results and quotients that are one more than
+ * what we need here.
+ * This macro adjust the results so that the modulo-value m is always >=0.
+ *
+ * For positive n, the if() condition is always FALSE.
+ *
+ * @param n Number to be split into quotient and rest.
+ * Will be modified to contain the quotient.
+ * @param d Divisor.
+ * @param m Output variable for the rest (modulo result).
+ */
#define NEGDIVMOD(n, d, m) UPRV_BLOCK_MACRO_BEGIN { \
- (m)=(n)%(d); \
- (n)/=(d); \
- if((m)<0) { \
- --(n); \
- (m)+=(d); \
- } \
+ (m)=(n)%(d); \
+ (n)/=(d); \
+ if((m)<0) { \
+ --(n); \
+ (m)+=(d); \
+ } \
} UPRV_BLOCK_MACRO_END
-
-/* Faster versions of packDiff() for single-byte-encoded diff values. */
-
-/** Is a diff value encodable in a single byte? */
-#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
-
-/** Encode a diff value in a single byte. */
-#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
-
-/** Is a diff value encodable in two bytes? */
-#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
-
-/* BOCU-1 implementation functions ------------------------------------------ */
-
-#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
-
-/**
- * Compute the next "previous" value for differencing
- * from the current code point.
- *
- * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
- * @return "previous code point" state value
- */
-static inline int32_t
-bocu1Prev(int32_t c) {
- /* compute new prev */
- if(/* 0x3040<=c && */ c<=0x309f) {
- /* Hiragana is not 128-aligned */
- return 0x3070;
- } else if(0x4e00<=c && c<=0x9fa5) {
- /* CJK Unihan */
- return 0x4e00-BOCU1_REACH_NEG_2;
- } else if(0xac00<=c /* && c<=0xd7a3 */) {
- /* Korean Hangul */
- return (0xd7a3+0xac00)/2;
- } else {
- /* mostly small scripts */
- return BOCU1_SIMPLE_PREV(c);
- }
-}
-
-/** Fast version of bocu1Prev() for most scripts. */
-#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
-
-/*
- * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
- * The UConverter fields are used as follows:
- *
- * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
- *
- * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
- * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0)
- */
-
-/* BOCU-1-from-Unicode conversion functions --------------------------------- */
-
-/**
- * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
- * and return a packed integer with them.
- *
- * The encoding favors small absolute differences with short encodings
- * to compress runs of same-script characters.
- *
- * Optimized version with unrolled loops and fewer floating-point operations
- * than the standard packDiff().
- *
- * @param diff difference value -0x10ffff..0x10ffff
- * @return
- * 0x010000zz for 1-byte sequence zz
- * 0x0200yyzz for 2-byte sequence yy zz
- * 0x03xxyyzz for 3-byte sequence xx yy zz
- * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
- */
-static int32_t
-packDiff(int32_t diff) {
- int32_t result, m;
-
- U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
- if(diff>=BOCU1_REACH_NEG_1) {
- /* mostly positive differences, and single-byte negative ones */
-#if 0 /* single-byte case handled in macros, see below */
- if(diff<=BOCU1_REACH_POS_1) {
- /* single byte */
- return 0x01000000|(BOCU1_MIDDLE+diff);
- } else
-#endif
- if(diff<=BOCU1_REACH_POS_2) {
- /* two bytes */
- diff-=BOCU1_REACH_POS_1+1;
- result=0x02000000;
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- result|=(BOCU1_START_POS_2+diff)<<8;
- } else if(diff<=BOCU1_REACH_POS_3) {
- /* three bytes */
- diff-=BOCU1_REACH_POS_2+1;
- result=0x03000000;
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- result|=(BOCU1_START_POS_3+diff)<<16;
- } else {
- /* four bytes */
- diff-=BOCU1_REACH_POS_3+1;
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result=BOCU1_TRAIL_TO_BYTE(m);
-
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- /*
- * We know that / and % would deliver quotient 0 and rest=diff.
- * Avoid division and modulo for performance.
- */
- result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
-
- result|=((uint32_t)BOCU1_START_POS_4)<<24;
- }
- } else {
- /* two- to four-byte negative differences */
- if(diff>=BOCU1_REACH_NEG_2) {
- /* two bytes */
- diff-=BOCU1_REACH_NEG_1;
- result=0x02000000;
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- result|=(BOCU1_START_NEG_2+diff)<<8;
- } else if(diff>=BOCU1_REACH_NEG_3) {
- /* three bytes */
- diff-=BOCU1_REACH_NEG_2;
- result=0x03000000;
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m);
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- result|=(BOCU1_START_NEG_3+diff)<<16;
- } else {
- /* four bytes */
- diff-=BOCU1_REACH_NEG_3;
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result=BOCU1_TRAIL_TO_BYTE(m);
-
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
-
- /*
- * We know that NEGDIVMOD would deliver
- * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
- * Avoid division and modulo for performance.
- */
- m=diff+BOCU1_TRAIL_COUNT;
- result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
-
- result|=BOCU1_MIN<<24;
- }
- }
- return result;
-}
-
-
-static void U_CALLCONV
-_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
-
- int32_t prev, c, diff;
-
- int32_t sourceIndex, nextSourceIndex;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
- prev=(int32_t)cnv->fromUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use only one loop counter variable, targetCapacity, not also source */
- diff=(int32_t)(sourceLimit-source);
- if(targetCapacity>diff) {
- targetCapacity=diff;
- }
- while(targetCapacity>0 && (c=*source)<0x3000) {
- if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- *offsets++=nextSourceIndex++;
- ++source;
- --targetCapacity;
- } else {
- diff=c-prev;
- if(DIFF_IS_SINGLE(diff)) {
- prev=BOCU1_SIMPLE_PREV(c);
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- *offsets++=nextSourceIndex++;
- ++source;
- --targetCapacity;
- } else {
- break;
- }
- }
- }
- /* restore real values */
- targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
- sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
-
- /* regular loop for all cases */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- c=*source++;
- ++nextSourceIndex;
-
- if(c<=0x20) {
- /*
- * ISO C0 control & space:
- * Encode directly for MIME compatibility,
- * and reset state except for space, to not disrupt compression.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- *offsets++=sourceIndex;
- --targetCapacity;
-
- sourceIndex=nextSourceIndex;
- continue;
- }
-
- if(U16_IS_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- }
- } else {
- /* no more input */
- c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
- break;
- }
- }
-
- /*
- * all other Unicode code points c==U+0021..U+10ffff
- * are encoded with the difference c-prev
- *
- * a new prev is computed from c,
- * placed in the middle of a 0x80-block (for most small scripts) or
- * in the middle of the Unihan and Hangul blocks
- * to statistically minimize the following difference
- */
- diff=c-prev;
- prev=BOCU1_PREV(c);
- if(DIFF_IS_SINGLE(diff)) {
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- *offsets++=sourceIndex;
- --targetCapacity;
- sourceIndex=nextSourceIndex;
- if(c<0x3000) {
- goto fastSingle;
- }
- } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
- /* optimize 2-byte case */
- int32_t m;
-
- if(diff>=0) {
- diff-=BOCU1_REACH_POS_1+1;
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- diff+=BOCU1_START_POS_2;
- } else {
- diff-=BOCU1_REACH_NEG_1;
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- diff+=BOCU1_START_NEG_2;
- }
- *target++=(uint8_t)diff;
- *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- targetCapacity-=2;
- sourceIndex=nextSourceIndex;
- } else {
- int32_t length; /* will be 2..4 */
-
- diff=packDiff(diff);
- length=BOCU1_LENGTH_FROM_PACKED(diff);
-
- /* write the output character bytes from diff and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(diff>>24);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(diff>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(diff>>8);
- *offsets++=sourceIndex;
- /* case 1: handled above */
- *target++=(uint8_t)diff;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
- sourceIndex=nextSourceIndex;
- } else {
- uint8_t *charErrorBuffer;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 1<=targetCapacity<length<=4 */
- length-=targetCapacity;
- charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 3:
- *charErrorBuffer++=(uint8_t)(diff>>16);
- U_FALLTHROUGH;
- case 2:
- *charErrorBuffer++=(uint8_t)(diff>>8);
- U_FALLTHROUGH;
- case 1:
- *charErrorBuffer=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- diff>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(diff>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(diff>>8);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)diff;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32= c<0 ? -c : 0;
- cnv->fromUnicodeStatus=(uint32_t)prev;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
-}
-
-/*
- * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
-static void U_CALLCONV
-_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const UChar *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
-
- int32_t prev, c, diff;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
-
- /* get the converter state from UConverter */
- c=cnv->fromUChar32;
- prev=(int32_t)cnv->fromUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
-
- /* conversion loop */
- if(c!=0 && targetCapacity>0) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use only one loop counter variable, targetCapacity, not also source */
- diff=(int32_t)(sourceLimit-source);
- if(targetCapacity>diff) {
- targetCapacity=diff;
- }
- while(targetCapacity>0 && (c=*source)<0x3000) {
- if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- } else {
- diff=c-prev;
- if(DIFF_IS_SINGLE(diff)) {
- prev=BOCU1_SIMPLE_PREV(c);
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- } else {
- break;
- }
- }
- ++source;
- --targetCapacity;
- }
- /* restore real values */
- targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
-
- /* regular loop for all cases */
- while(source<sourceLimit) {
- if(targetCapacity>0) {
- c=*source++;
-
- if(c<=0x20) {
- /*
- * ISO C0 control & space:
- * Encode directly for MIME compatibility,
- * and reset state except for space, to not disrupt compression.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(uint8_t)c;
- --targetCapacity;
- continue;
- }
-
- if(U16_IS_LEAD(c)) {
-getTrail:
- if(source<sourceLimit) {
- /* test the following code unit */
- UChar trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- }
- } else {
- /* no more input */
- c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
- break;
- }
- }
-
- /*
- * all other Unicode code points c==U+0021..U+10ffff
- * are encoded with the difference c-prev
- *
- * a new prev is computed from c,
- * placed in the middle of a 0x80-block (for most small scripts) or
- * in the middle of the Unihan and Hangul blocks
- * to statistically minimize the following difference
- */
- diff=c-prev;
- prev=BOCU1_PREV(c);
- if(DIFF_IS_SINGLE(diff)) {
- *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
- --targetCapacity;
- if(c<0x3000) {
- goto fastSingle;
- }
- } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
- /* optimize 2-byte case */
- int32_t m;
-
- if(diff>=0) {
- diff-=BOCU1_REACH_POS_1+1;
- m=diff%BOCU1_TRAIL_COUNT;
- diff/=BOCU1_TRAIL_COUNT;
- diff+=BOCU1_START_POS_2;
- } else {
- diff-=BOCU1_REACH_NEG_1;
- NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
- diff+=BOCU1_START_NEG_2;
- }
- *target++=(uint8_t)diff;
- *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
- targetCapacity-=2;
- } else {
- int32_t length; /* will be 2..4 */
-
- diff=packDiff(diff);
- length=BOCU1_LENGTH_FROM_PACKED(diff);
-
- /* write the output character bytes from diff and length */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(diff>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(diff>>16);
- /* case 2: handled above */
- *target++=(uint8_t)(diff>>8);
- /* case 1: handled above */
- *target++=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
- } else {
- uint8_t *charErrorBuffer;
-
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 1<=targetCapacity<length<=4 */
- length-=targetCapacity;
- charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 3:
- *charErrorBuffer++=(uint8_t)(diff>>16);
- U_FALLTHROUGH;
- case 2:
- *charErrorBuffer++=(uint8_t)(diff>>8);
- U_FALLTHROUGH;
- case 1:
- *charErrorBuffer=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
-
- /* now output what fits into the regular target */
- diff>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(diff>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(diff>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)diff;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
-
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- } else {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
-
- /* set the converter state back into UConverter */
- cnv->fromUChar32= c<0 ? -c : 0;
- cnv->fromUnicodeStatus=(uint32_t)prev;
-
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
-}
-
-/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
-
-/**
- * Function for BOCU-1 decoder; handles multi-byte lead bytes.
- *
- * @param b lead byte;
- * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
- * @return (diff<<2)|count
- */
-static inline int32_t
-decodeBocu1LeadByte(int32_t b) {
- int32_t diff, count;
-
- if(b>=BOCU1_START_NEG_2) {
- /* positive difference */
- if(b<BOCU1_START_POS_3) {
- /* two bytes */
- diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
- count=1;
- } else if(b<BOCU1_START_POS_4) {
- /* three bytes */
- diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
- count=2;
- } else {
- /* four bytes */
- diff=BOCU1_REACH_POS_3+1;
- count=3;
- }
- } else {
- /* negative difference */
- if(b>=BOCU1_START_NEG_3) {
- /* two bytes */
- diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
- count=1;
- } else if(b>BOCU1_MIN) {
- /* three bytes */
- diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
- count=2;
- } else {
- /* four bytes */
- diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
- count=3;
- }
- }
-
- /* return the state for decoding the trail byte(s) */
- return (diff<<2)|count;
-}
-
-/**
- * Function for BOCU-1 decoder; handles multi-byte trail bytes.
- *
- * @param count number of remaining trail bytes including this one
- * @param b trail byte
- * @return new delta for diff including b - <0 indicates an error
- *
- * @see decodeBocu1
- */
-static inline int32_t
-decodeBocu1TrailByte(int32_t count, int32_t b) {
- if(b<=0x20) {
- /* skip some C0 controls and make the trail byte range contiguous */
- b=bocu1ByteToTrail[b];
- /* b<0 for an illegal trail byte value will result in return<0 below */
-#if BOCU1_MAX_TRAIL<0xff
- } else if(b>BOCU1_MAX_TRAIL) {
- return -99;
-#endif
- } else {
- b-=BOCU1_TRAIL_BYTE_OFFSET;
- }
-
- /* add trail byte into difference and decrement count */
- if(count==1) {
- return b;
- } else if(count==2) {
- return b*BOCU1_TRAIL_COUNT;
- } else /* count==3 */ {
- return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
- }
-}
-
-static void U_CALLCONV
-_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
- int32_t *offsets;
-
- int32_t prev, count, diff, c;
-
- int8_t byteIndex;
- uint8_t *bytes;
-
- int32_t sourceIndex, nextSourceIndex;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
-
- /* get the converter state from UConverter */
- prev=(int32_t)cnv->toUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
- diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
- count=diff&3;
- diff>>=2;
-
- byteIndex=cnv->toULength;
- bytes=cnv->toUBytes;
-
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=byteIndex==0 ? 0 : -1;
- nextSourceIndex=0;
-
- /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
- if(count>0 && byteIndex>0 && target<targetLimit) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use count as the only loop counter variable */
- diff=(int32_t)(sourceLimit-source);
- count=(int32_t)(pArgs->targetLimit-target);
- if(count>diff) {
- count=diff;
- }
- while(count>0) {
- if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- *offsets++=nextSourceIndex++;
- prev=BOCU1_SIMPLE_PREV(c);
- } else {
- break;
- }
- } else if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- *offsets++=nextSourceIndex++;
- } else {
- break;
- }
- ++source;
- --count;
- }
- sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
-
- /* decode a sequence of single and lead bytes */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- ++nextSourceIndex;
- c=*source++;
- if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
- /* Write a code point directly from a single-byte difference. */
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- *offsets++=sourceIndex;
- prev=BOCU1_SIMPLE_PREV(c);
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- }
- } else if(c<=0x20) {
- /*
- * Direct-encoded C0 control code or space.
- * Reset prev for C0 control codes but not for space.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- *offsets++=sourceIndex;
- sourceIndex=nextSourceIndex;
- continue;
- } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
- /* Optimize two-byte case. */
- if(c>=BOCU1_MIDDLE) {
- diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
- } else {
- diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
- }
-
- /* trail byte */
- ++nextSourceIndex;
- c=decodeBocu1TrailByte(1, *source++);
- if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
- bytes[0]=source[-2];
- bytes[1]=source[-1];
- byteIndex=2;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else if(c==BOCU1_RESET) {
- /* only reset the state, no code point */
- prev=BOCU1_ASCII_PREV;
- sourceIndex=nextSourceIndex;
- continue;
- } else {
- /*
- * For multi-byte difference lead bytes, set the decoder state
- * with the partial difference value from the lead byte and
- * with the number of trail bytes.
- */
- bytes[0]=(uint8_t)c;
- byteIndex=1;
-
- diff=decodeBocu1LeadByte(c);
- count=diff&3;
- diff>>=2;
-getTrail:
- for(;;) {
- if(source>=sourceLimit) {
- goto endloop;
- }
- ++nextSourceIndex;
- c=bytes[byteIndex++]=*source++;
-
- /* trail byte in any position */
- c=decodeBocu1TrailByte(count, c);
- if(c<0) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- diff+=c;
- if(--count==0) {
- /* final trail byte, deliver a code point */
- byteIndex=0;
- c=prev+diff;
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- break;
- }
- }
- }
-
- /* calculate the next prev and output c */
- prev=BOCU1_PREV(c);
- if(c<=0xffff) {
- *target++=(UChar)c;
- *offsets++=sourceIndex;
- } else {
- /* output surrogate pair */
- *target++=U16_LEAD(c);
- if(target<targetLimit) {
- *target++=U16_TRAIL(c);
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- } else {
- /* target overflow */
- *offsets++=sourceIndex;
- cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- sourceIndex=nextSourceIndex;
- }
-endloop:
-
- if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
- /* set the converter state in UConverter to deal with the next character */
- cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
- cnv->mode=0;
- } else {
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=(uint32_t)prev;
- cnv->mode=(diff<<2)|count;
- }
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
-}
-
-/*
- * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
-static void U_CALLCONV
-_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- const uint8_t *source, *sourceLimit;
- UChar *target;
- const UChar *targetLimit;
-
- int32_t prev, count, diff, c;
-
- int8_t byteIndex;
- uint8_t *bytes;
-
- /* set up the local pointers */
- cnv=pArgs->converter;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
-
- /* get the converter state from UConverter */
- prev=(int32_t)cnv->toUnicodeStatus;
- if(prev==0) {
- prev=BOCU1_ASCII_PREV;
- }
- diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
- count=diff&3;
- diff>>=2;
-
- byteIndex=cnv->toULength;
- bytes=cnv->toUBytes;
-
- /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
- if(count>0 && byteIndex>0 && target<targetLimit) {
- goto getTrail;
- }
-
-fastSingle:
- /* fast loop for single-byte differences */
- /* use count as the only loop counter variable */
- diff=(int32_t)(sourceLimit-source);
- count=(int32_t)(pArgs->targetLimit-target);
- if(count>diff) {
- count=diff;
- }
- while(count>0) {
- if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- prev=BOCU1_SIMPLE_PREV(c);
- } else {
- break;
- }
- } else if(c<=0x20) {
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- } else {
- break;
- }
- ++source;
- --count;
- }
-
- /* decode a sequence of single and lead bytes */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
-
- c=*source++;
- if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
- /* Write a code point directly from a single-byte difference. */
- c=prev+(c-BOCU1_MIDDLE);
- if(c<0x3000) {
- *target++=(UChar)c;
- prev=BOCU1_SIMPLE_PREV(c);
- goto fastSingle;
- }
- } else if(c<=0x20) {
- /*
- * Direct-encoded C0 control code or space.
- * Reset prev for C0 control codes but not for space.
- */
- if(c!=0x20) {
- prev=BOCU1_ASCII_PREV;
- }
- *target++=(UChar)c;
- continue;
- } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
- /* Optimize two-byte case. */
- if(c>=BOCU1_MIDDLE) {
- diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
- } else {
- diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
- }
-
- /* trail byte */
- c=decodeBocu1TrailByte(1, *source++);
- if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
- bytes[0]=source[-2];
- bytes[1]=source[-1];
- byteIndex=2;
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- break;
- }
- } else if(c==BOCU1_RESET) {
- /* only reset the state, no code point */
- prev=BOCU1_ASCII_PREV;
- continue;
- } else {
- /*
- * For multi-byte difference lead bytes, set the decoder state
- * with the partial difference value from the lead byte and
- * with the number of trail bytes.
- */
- bytes[0]=(uint8_t)c;
- byteIndex=1;
-
- diff=decodeBocu1LeadByte(c);
- count=diff&3;
- diff>>=2;
-getTrail:
- for(;;) {
- if(source>=sourceLimit) {
- goto endloop;
- }
- c=bytes[byteIndex++]=*source++;
-
- /* trail byte in any position */
- c=decodeBocu1TrailByte(count, c);
- if(c<0) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
-
- diff+=c;
- if(--count==0) {
- /* final trail byte, deliver a code point */
- byteIndex=0;
- c=prev+diff;
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- break;
- }
- }
- }
-
- /* calculate the next prev and output c */
- prev=BOCU1_PREV(c);
- if(c<=0xffff) {
- *target++=(UChar)c;
- } else {
- /* output surrogate pair */
- *target++=U16_LEAD(c);
- if(target<targetLimit) {
- *target++=U16_TRAIL(c);
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- }
- }
-endloop:
-
- if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
- /* set the converter state in UConverter to deal with the next character */
- cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
- cnv->mode=0;
- } else {
- /* set the converter state back into UConverter */
- cnv->toUnicodeStatus=(uint32_t)prev;
- cnv->mode=(diff<<2)|count;
- }
- cnv->toULength=byteIndex;
-
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- return;
-}
-
-/* miscellaneous ------------------------------------------------------------ */
-
-static const UConverterImpl _Bocu1Impl={
- UCNV_BOCU1,
-
- NULL,
- NULL,
-
- NULL,
- NULL,
- NULL,
-
- _Bocu1ToUnicode,
- _Bocu1ToUnicodeWithOffsets,
- _Bocu1FromUnicode,
- _Bocu1FromUnicodeWithOffsets,
- NULL,
-
- NULL,
- NULL,
- NULL,
- NULL,
- ucnv_getCompleteUnicodeSet,
-
- NULL,
- NULL
-};
-
-static const UConverterStaticData _Bocu1StaticData={
- sizeof(UConverterStaticData),
- "BOCU-1",
- 1214, /* CCSID for BOCU-1 */
- UCNV_IBM, UCNV_BOCU1,
- 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
- { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
- FALSE, FALSE,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
-};
-
-const UConverterSharedData _Bocu1Data=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl);
-
-#endif
+
+/* Faster versions of packDiff() for single-byte-encoded diff values. */
+
+/** Is a diff value encodable in a single byte? */
+#define DIFF_IS_SINGLE(diff) (BOCU1_REACH_NEG_1<=(diff) && (diff)<=BOCU1_REACH_POS_1)
+
+/** Encode a diff value in a single byte. */
+#define PACK_SINGLE_DIFF(diff) (BOCU1_MIDDLE+(diff))
+
+/** Is a diff value encodable in two bytes? */
+#define DIFF_IS_DOUBLE(diff) (BOCU1_REACH_NEG_2<=(diff) && (diff)<=BOCU1_REACH_POS_2)
+
+/* BOCU-1 implementation functions ------------------------------------------ */
+
+#define BOCU1_SIMPLE_PREV(c) (((c)&~0x7f)+BOCU1_ASCII_PREV)
+
+/**
+ * Compute the next "previous" value for differencing
+ * from the current code point.
+ *
+ * @param c current code point, 0x3040..0xd7a3 (rest handled by macro below)
+ * @return "previous code point" state value
+ */
+static inline int32_t
+bocu1Prev(int32_t c) {
+ /* compute new prev */
+ if(/* 0x3040<=c && */ c<=0x309f) {
+ /* Hiragana is not 128-aligned */
+ return 0x3070;
+ } else if(0x4e00<=c && c<=0x9fa5) {
+ /* CJK Unihan */
+ return 0x4e00-BOCU1_REACH_NEG_2;
+ } else if(0xac00<=c /* && c<=0xd7a3 */) {
+ /* Korean Hangul */
+ return (0xd7a3+0xac00)/2;
+ } else {
+ /* mostly small scripts */
+ return BOCU1_SIMPLE_PREV(c);
+ }
+}
+
+/** Fast version of bocu1Prev() for most scripts. */
+#define BOCU1_PREV(c) ((c)<0x3040 || (c)>0xd7a3 ? BOCU1_SIMPLE_PREV(c) : bocu1Prev(c))
+
+/*
+ * The BOCU-1 converter uses the standard setup code in ucnv.c/ucnv_bld.c.
+ * The UConverter fields are used as follows:
+ *
+ * fromUnicodeStatus encoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ *
+ * toUnicodeStatus decoder's prev (0 will be interpreted as BOCU1_ASCII_PREV)
+ * mode decoder's incomplete (diff<<2)|count (ignored when toULength==0)
+ */
+
+/* BOCU-1-from-Unicode conversion functions --------------------------------- */
+
+/**
+ * Encode a difference -0x10ffff..0x10ffff in 1..4 bytes
+ * and return a packed integer with them.
+ *
+ * The encoding favors small absolute differences with short encodings
+ * to compress runs of same-script characters.
+ *
+ * Optimized version with unrolled loops and fewer floating-point operations
+ * than the standard packDiff().
+ *
+ * @param diff difference value -0x10ffff..0x10ffff
+ * @return
+ * 0x010000zz for 1-byte sequence zz
+ * 0x0200yyzz for 2-byte sequence yy zz
+ * 0x03xxyyzz for 3-byte sequence xx yy zz
+ * 0xwwxxyyzz for 4-byte sequence ww xx yy zz (ww>0x03)
+ */
+static int32_t
+packDiff(int32_t diff) {
+ int32_t result, m;
+
+ U_ASSERT(!DIFF_IS_SINGLE(diff)); /* assume we won't be called where diff==BOCU1_REACH_NEG_1=-64 */
+ if(diff>=BOCU1_REACH_NEG_1) {
+ /* mostly positive differences, and single-byte negative ones */
+#if 0 /* single-byte case handled in macros, see below */
+ if(diff<=BOCU1_REACH_POS_1) {
+ /* single byte */
+ return 0x01000000|(BOCU1_MIDDLE+diff);
+ } else
+#endif
+ if(diff<=BOCU1_REACH_POS_2) {
+ /* two bytes */
+ diff-=BOCU1_REACH_POS_1+1;
+ result=0x02000000;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ result|=(BOCU1_START_POS_2+diff)<<8;
+ } else if(diff<=BOCU1_REACH_POS_3) {
+ /* three bytes */
+ diff-=BOCU1_REACH_POS_2+1;
+ result=0x03000000;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ result|=(BOCU1_START_POS_3+diff)<<16;
+ } else {
+ /* four bytes */
+ diff-=BOCU1_REACH_POS_3+1;
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result=BOCU1_TRAIL_TO_BYTE(m);
+
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ /*
+ * We know that / and % would deliver quotient 0 and rest=diff.
+ * Avoid division and modulo for performance.
+ */
+ result|=BOCU1_TRAIL_TO_BYTE(diff)<<16;
+
+ result|=((uint32_t)BOCU1_START_POS_4)<<24;
+ }
+ } else {
+ /* two- to four-byte negative differences */
+ if(diff>=BOCU1_REACH_NEG_2) {
+ /* two bytes */
+ diff-=BOCU1_REACH_NEG_1;
+ result=0x02000000;
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ result|=(BOCU1_START_NEG_2+diff)<<8;
+ } else if(diff>=BOCU1_REACH_NEG_3) {
+ /* three bytes */
+ diff-=BOCU1_REACH_NEG_2;
+ result=0x03000000;
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m);
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ result|=(BOCU1_START_NEG_3+diff)<<16;
+ } else {
+ /* four bytes */
+ diff-=BOCU1_REACH_NEG_3;
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result=BOCU1_TRAIL_TO_BYTE(m);
+
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<8;
+
+ /*
+ * We know that NEGDIVMOD would deliver
+ * quotient -1 and rest=diff+BOCU1_TRAIL_COUNT.
+ * Avoid division and modulo for performance.
+ */
+ m=diff+BOCU1_TRAIL_COUNT;
+ result|=BOCU1_TRAIL_TO_BYTE(m)<<16;
+
+ result|=BOCU1_MIN<<24;
+ }
+ }
+ return result;
+}
+
+
+static void U_CALLCONV
+_Bocu1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+ int32_t *offsets;
+
+ int32_t prev, c, diff;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+ offsets=pArgs->offsets;
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+ prev=(int32_t)cnv->fromUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex= c==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use only one loop counter variable, targetCapacity, not also source */
+ diff=(int32_t)(sourceLimit-source);
+ if(targetCapacity>diff) {
+ targetCapacity=diff;
+ }
+ while(targetCapacity>0 && (c=*source)<0x3000) {
+ if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ *offsets++=nextSourceIndex++;
+ ++source;
+ --targetCapacity;
+ } else {
+ diff=c-prev;
+ if(DIFF_IS_SINGLE(diff)) {
+ prev=BOCU1_SIMPLE_PREV(c);
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ *offsets++=nextSourceIndex++;
+ ++source;
+ --targetCapacity;
+ } else {
+ break;
+ }
+ }
+ }
+ /* restore real values */
+ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
+ sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+
+ /* regular loop for all cases */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ c=*source++;
+ ++nextSourceIndex;
+
+ if(c<=0x20) {
+ /*
+ * ISO C0 control & space:
+ * Encode directly for MIME compatibility,
+ * and reset state except for space, to not disrupt compression.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ *offsets++=sourceIndex;
+ --targetCapacity;
+
+ sourceIndex=nextSourceIndex;
+ continue;
+ }
+
+ if(U16_IS_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ ++nextSourceIndex;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ }
+ } else {
+ /* no more input */
+ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
+ break;
+ }
+ }
+
+ /*
+ * all other Unicode code points c==U+0021..U+10ffff
+ * are encoded with the difference c-prev
+ *
+ * a new prev is computed from c,
+ * placed in the middle of a 0x80-block (for most small scripts) or
+ * in the middle of the Unihan and Hangul blocks
+ * to statistically minimize the following difference
+ */
+ diff=c-prev;
+ prev=BOCU1_PREV(c);
+ if(DIFF_IS_SINGLE(diff)) {
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ *offsets++=sourceIndex;
+ --targetCapacity;
+ sourceIndex=nextSourceIndex;
+ if(c<0x3000) {
+ goto fastSingle;
+ }
+ } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
+ /* optimize 2-byte case */
+ int32_t m;
+
+ if(diff>=0) {
+ diff-=BOCU1_REACH_POS_1+1;
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ diff+=BOCU1_START_POS_2;
+ } else {
+ diff-=BOCU1_REACH_NEG_1;
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ diff+=BOCU1_START_NEG_2;
+ }
+ *target++=(uint8_t)diff;
+ *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ targetCapacity-=2;
+ sourceIndex=nextSourceIndex;
+ } else {
+ int32_t length; /* will be 2..4 */
+
+ diff=packDiff(diff);
+ length=BOCU1_LENGTH_FROM_PACKED(diff);
+
+ /* write the output character bytes from diff and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(diff>>24);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(diff>>8);
+ *offsets++=sourceIndex;
+ /* case 1: handled above */
+ *target++=(uint8_t)diff;
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ targetCapacity-=length;
+ sourceIndex=nextSourceIndex;
+ } else {
+ uint8_t *charErrorBuffer;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity<length<=4 */
+ length-=targetCapacity;
+ charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 3:
+ *charErrorBuffer++=(uint8_t)(diff>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *charErrorBuffer++=(uint8_t)(diff>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *charErrorBuffer=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ diff>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(diff>>8);
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)diff;
+ *offsets++=sourceIndex;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32= c<0 ? -c : 0;
+ cnv->fromUnicodeStatus=(uint32_t)prev;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+ pArgs->offsets=offsets;
+}
+
+/*
+ * Identical to _Bocu1FromUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_Bocu1FromUnicode(UConverterFromUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const UChar *source, *sourceLimit;
+ uint8_t *target;
+ int32_t targetCapacity;
+
+ int32_t prev, c, diff;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=pArgs->source;
+ sourceLimit=pArgs->sourceLimit;
+ target=(uint8_t *)pArgs->target;
+ targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
+
+ /* get the converter state from UConverter */
+ c=cnv->fromUChar32;
+ prev=(int32_t)cnv->fromUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+
+ /* conversion loop */
+ if(c!=0 && targetCapacity>0) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use only one loop counter variable, targetCapacity, not also source */
+ diff=(int32_t)(sourceLimit-source);
+ if(targetCapacity>diff) {
+ targetCapacity=diff;
+ }
+ while(targetCapacity>0 && (c=*source)<0x3000) {
+ if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ } else {
+ diff=c-prev;
+ if(DIFF_IS_SINGLE(diff)) {
+ prev=BOCU1_SIMPLE_PREV(c);
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ } else {
+ break;
+ }
+ }
+ ++source;
+ --targetCapacity;
+ }
+ /* restore real values */
+ targetCapacity=(int32_t)((const uint8_t *)pArgs->targetLimit-target);
+
+ /* regular loop for all cases */
+ while(source<sourceLimit) {
+ if(targetCapacity>0) {
+ c=*source++;
+
+ if(c<=0x20) {
+ /*
+ * ISO C0 control & space:
+ * Encode directly for MIME compatibility,
+ * and reset state except for space, to not disrupt compression.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(uint8_t)c;
+ --targetCapacity;
+ continue;
+ }
+
+ if(U16_IS_LEAD(c)) {
+getTrail:
+ if(source<sourceLimit) {
+ /* test the following code unit */
+ UChar trail=*source;
+ if(U16_IS_TRAIL(trail)) {
+ ++source;
+ c=U16_GET_SUPPLEMENTARY(c, trail);
+ }
+ } else {
+ /* no more input */
+ c=-c; /* negative lead surrogate as "incomplete" indicator to avoid c=0 everywhere else */
+ break;
+ }
+ }
+
+ /*
+ * all other Unicode code points c==U+0021..U+10ffff
+ * are encoded with the difference c-prev
+ *
+ * a new prev is computed from c,
+ * placed in the middle of a 0x80-block (for most small scripts) or
+ * in the middle of the Unihan and Hangul blocks
+ * to statistically minimize the following difference
+ */
+ diff=c-prev;
+ prev=BOCU1_PREV(c);
+ if(DIFF_IS_SINGLE(diff)) {
+ *target++=(uint8_t)PACK_SINGLE_DIFF(diff);
+ --targetCapacity;
+ if(c<0x3000) {
+ goto fastSingle;
+ }
+ } else if(DIFF_IS_DOUBLE(diff) && 2<=targetCapacity) {
+ /* optimize 2-byte case */
+ int32_t m;
+
+ if(diff>=0) {
+ diff-=BOCU1_REACH_POS_1+1;
+ m=diff%BOCU1_TRAIL_COUNT;
+ diff/=BOCU1_TRAIL_COUNT;
+ diff+=BOCU1_START_POS_2;
+ } else {
+ diff-=BOCU1_REACH_NEG_1;
+ NEGDIVMOD(diff, BOCU1_TRAIL_COUNT, m);
+ diff+=BOCU1_START_NEG_2;
+ }
+ *target++=(uint8_t)diff;
+ *target++=(uint8_t)BOCU1_TRAIL_TO_BYTE(m);
+ targetCapacity-=2;
+ } else {
+ int32_t length; /* will be 2..4 */
+
+ diff=packDiff(diff);
+ length=BOCU1_LENGTH_FROM_PACKED(diff);
+
+ /* write the output character bytes from diff and length */
+ /* from the first if in the loop we know that targetCapacity>0 */
+ if(length<=targetCapacity) {
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 4:
+ *target++=(uint8_t)(diff>>24);
+ U_FALLTHROUGH;
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ /* case 2: handled above */
+ *target++=(uint8_t)(diff>>8);
+ /* case 1: handled above */
+ *target++=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ targetCapacity-=length;
+ } else {
+ uint8_t *charErrorBuffer;
+
+ /*
+ * We actually do this backwards here:
+ * In order to save an intermediate variable, we output
+ * first to the overflow buffer what does not fit into the
+ * regular target.
+ */
+ /* we know that 1<=targetCapacity<length<=4 */
+ length-=targetCapacity;
+ charErrorBuffer=(uint8_t *)cnv->charErrorBuffer;
+ switch(length) {
+ /* each branch falls through to the next one */
+ case 3:
+ *charErrorBuffer++=(uint8_t)(diff>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *charErrorBuffer++=(uint8_t)(diff>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *charErrorBuffer=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+ cnv->charErrorBufferLength=(int8_t)length;
+
+ /* now output what fits into the regular target */
+ diff>>=8*length; /* length was reduced by targetCapacity */
+ switch(targetCapacity) {
+ /* each branch falls through to the next one */
+ case 3:
+ *target++=(uint8_t)(diff>>16);
+ U_FALLTHROUGH;
+ case 2:
+ *target++=(uint8_t)(diff>>8);
+ U_FALLTHROUGH;
+ case 1:
+ *target++=(uint8_t)diff;
+ U_FALLTHROUGH;
+ default:
+ /* will never occur */
+ break;
+ }
+
+ /* target overflow */
+ targetCapacity=0;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ } else {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+
+ /* set the converter state back into UConverter */
+ cnv->fromUChar32= c<0 ? -c : 0;
+ cnv->fromUnicodeStatus=(uint32_t)prev;
+
+ /* write back the updated pointers */
+ pArgs->source=source;
+ pArgs->target=(char *)target;
+}
+
+/* BOCU-1-to-Unicode conversion functions ----------------------------------- */
+
+/**
+ * Function for BOCU-1 decoder; handles multi-byte lead bytes.
+ *
+ * @param b lead byte;
+ * BOCU1_MIN<=b<BOCU1_START_NEG_2 or BOCU1_START_POS_2<=b<BOCU1_MAX_LEAD
+ * @return (diff<<2)|count
+ */
+static inline int32_t
+decodeBocu1LeadByte(int32_t b) {
+ int32_t diff, count;
+
+ if(b>=BOCU1_START_NEG_2) {
+ /* positive difference */
+ if(b<BOCU1_START_POS_3) {
+ /* two bytes */
+ diff=((int32_t)b-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+ count=1;
+ } else if(b<BOCU1_START_POS_4) {
+ /* three bytes */
+ diff=((int32_t)b-BOCU1_START_POS_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_2+1;
+ count=2;
+ } else {
+ /* four bytes */
+ diff=BOCU1_REACH_POS_3+1;
+ count=3;
+ }
+ } else {
+ /* negative difference */
+ if(b>=BOCU1_START_NEG_3) {
+ /* two bytes */
+ diff=((int32_t)b-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+ count=1;
+ } else if(b>BOCU1_MIN) {
+ /* three bytes */
+ diff=((int32_t)b-BOCU1_START_NEG_3)*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_2;
+ count=2;
+ } else {
+ /* four bytes */
+ diff=-BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_3;
+ count=3;
+ }
+ }
+
+ /* return the state for decoding the trail byte(s) */
+ return (diff<<2)|count;
+}
+
+/**
+ * Function for BOCU-1 decoder; handles multi-byte trail bytes.
+ *
+ * @param count number of remaining trail bytes including this one
+ * @param b trail byte
+ * @return new delta for diff including b - <0 indicates an error
+ *
+ * @see decodeBocu1
+ */
+static inline int32_t
+decodeBocu1TrailByte(int32_t count, int32_t b) {
+ if(b<=0x20) {
+ /* skip some C0 controls and make the trail byte range contiguous */
+ b=bocu1ByteToTrail[b];
+ /* b<0 for an illegal trail byte value will result in return<0 below */
+#if BOCU1_MAX_TRAIL<0xff
+ } else if(b>BOCU1_MAX_TRAIL) {
+ return -99;
+#endif
+ } else {
+ b-=BOCU1_TRAIL_BYTE_OFFSET;
+ }
+
+ /* add trail byte into difference and decrement count */
+ if(count==1) {
+ return b;
+ } else if(count==2) {
+ return b*BOCU1_TRAIL_COUNT;
+ } else /* count==3 */ {
+ return b*(BOCU1_TRAIL_COUNT*BOCU1_TRAIL_COUNT);
+ }
+}
+
+static void U_CALLCONV
+_Bocu1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+ int32_t *offsets;
+
+ int32_t prev, count, diff, c;
+
+ int8_t byteIndex;
+ uint8_t *bytes;
+
+ int32_t sourceIndex, nextSourceIndex;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+ offsets=pArgs->offsets;
+
+ /* get the converter state from UConverter */
+ prev=(int32_t)cnv->toUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
+ count=diff&3;
+ diff>>=2;
+
+ byteIndex=cnv->toULength;
+ bytes=cnv->toUBytes;
+
+ /* sourceIndex=-1 if the current character began in the previous buffer */
+ sourceIndex=byteIndex==0 ? 0 : -1;
+ nextSourceIndex=0;
+
+ /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+ if(count>0 && byteIndex>0 && target<targetLimit) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use count as the only loop counter variable */
+ diff=(int32_t)(sourceLimit-source);
+ count=(int32_t)(pArgs->targetLimit-target);
+ if(count>diff) {
+ count=diff;
+ }
+ while(count>0) {
+ if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ *offsets++=nextSourceIndex++;
+ prev=BOCU1_SIMPLE_PREV(c);
+ } else {
+ break;
+ }
+ } else if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ *offsets++=nextSourceIndex++;
+ } else {
+ break;
+ }
+ ++source;
+ --count;
+ }
+ sourceIndex=nextSourceIndex; /* wrong if offsets==NULL but does not matter */
+
+ /* decode a sequence of single and lead bytes */
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ ++nextSourceIndex;
+ c=*source++;
+ if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
+ /* Write a code point directly from a single-byte difference. */
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ *offsets++=sourceIndex;
+ prev=BOCU1_SIMPLE_PREV(c);
+ sourceIndex=nextSourceIndex;
+ goto fastSingle;
+ }
+ } else if(c<=0x20) {
+ /*
+ * Direct-encoded C0 control code or space.
+ * Reset prev for C0 control codes but not for space.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ *offsets++=sourceIndex;
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
+ /* Optimize two-byte case. */
+ if(c>=BOCU1_MIDDLE) {
+ diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+ } else {
+ diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+ }
+
+ /* trail byte */
+ ++nextSourceIndex;
+ c=decodeBocu1TrailByte(1, *source++);
+ if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
+ bytes[0]=source[-2];
+ bytes[1]=source[-1];
+ byteIndex=2;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else if(c==BOCU1_RESET) {
+ /* only reset the state, no code point */
+ prev=BOCU1_ASCII_PREV;
+ sourceIndex=nextSourceIndex;
+ continue;
+ } else {
+ /*
+ * For multi-byte difference lead bytes, set the decoder state
+ * with the partial difference value from the lead byte and
+ * with the number of trail bytes.
+ */
+ bytes[0]=(uint8_t)c;
+ byteIndex=1;
+
+ diff=decodeBocu1LeadByte(c);
+ count=diff&3;
+ diff>>=2;
+getTrail:
+ for(;;) {
+ if(source>=sourceLimit) {
+ goto endloop;
+ }
+ ++nextSourceIndex;
+ c=bytes[byteIndex++]=*source++;
+
+ /* trail byte in any position */
+ c=decodeBocu1TrailByte(count, c);
+ if(c<0) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ diff+=c;
+ if(--count==0) {
+ /* final trail byte, deliver a code point */
+ byteIndex=0;
+ c=prev+diff;
+ if((uint32_t)c>0x10ffff) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ break;
+ }
+ }
+ }
+
+ /* calculate the next prev and output c */
+ prev=BOCU1_PREV(c);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ *offsets++=sourceIndex;
+ } else {
+ /* output surrogate pair */
+ *target++=U16_LEAD(c);
+ if(target<targetLimit) {
+ *target++=U16_TRAIL(c);
+ *offsets++=sourceIndex;
+ *offsets++=sourceIndex;
+ } else {
+ /* target overflow */
+ *offsets++=sourceIndex;
+ cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ sourceIndex=nextSourceIndex;
+ }
+endloop:
+
+ if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
+ /* set the converter state in UConverter to deal with the next character */
+ cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
+ cnv->mode=0;
+ } else {
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=(uint32_t)prev;
+ cnv->mode=(diff<<2)|count;
+ }
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ pArgs->offsets=offsets;
+ return;
+}
+
+/*
+ * Identical to _Bocu1ToUnicodeWithOffsets but without offset handling.
+ * If a change is made in the original function, then either
+ * change this function the same way or
+ * re-copy the original function and remove the variables
+ * offsets, sourceIndex, and nextSourceIndex.
+ */
+static void U_CALLCONV
+_Bocu1ToUnicode(UConverterToUnicodeArgs *pArgs,
+ UErrorCode *pErrorCode) {
+ UConverter *cnv;
+ const uint8_t *source, *sourceLimit;
+ UChar *target;
+ const UChar *targetLimit;
+
+ int32_t prev, count, diff, c;
+
+ int8_t byteIndex;
+ uint8_t *bytes;
+
+ /* set up the local pointers */
+ cnv=pArgs->converter;
+ source=(const uint8_t *)pArgs->source;
+ sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+ target=pArgs->target;
+ targetLimit=pArgs->targetLimit;
+
+ /* get the converter state from UConverter */
+ prev=(int32_t)cnv->toUnicodeStatus;
+ if(prev==0) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ diff=cnv->mode; /* mode may be set to UCNV_SI by ucnv_bld.c but then toULength==0 */
+ count=diff&3;
+ diff>>=2;
+
+ byteIndex=cnv->toULength;
+ bytes=cnv->toUBytes;
+
+ /* conversion "loop" similar to _SCSUToUnicodeWithOffsets() */
+ if(count>0 && byteIndex>0 && target<targetLimit) {
+ goto getTrail;
+ }
+
+fastSingle:
+ /* fast loop for single-byte differences */
+ /* use count as the only loop counter variable */
+ diff=(int32_t)(sourceLimit-source);
+ count=(int32_t)(pArgs->targetLimit-target);
+ if(count>diff) {
+ count=diff;
+ }
+ while(count>0) {
+ if(BOCU1_START_NEG_2<=(c=*source) && c<BOCU1_START_POS_2) {
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ prev=BOCU1_SIMPLE_PREV(c);
+ } else {
+ break;
+ }
+ } else if(c<=0x20) {
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ } else {
+ break;
+ }
+ ++source;
+ --count;
+ }
+
+ /* decode a sequence of single and lead bytes */
+ while(source<sourceLimit) {
+ if(target>=targetLimit) {
+ /* target is full */
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+
+ c=*source++;
+ if(BOCU1_START_NEG_2<=c && c<BOCU1_START_POS_2) {
+ /* Write a code point directly from a single-byte difference. */
+ c=prev+(c-BOCU1_MIDDLE);
+ if(c<0x3000) {
+ *target++=(UChar)c;
+ prev=BOCU1_SIMPLE_PREV(c);
+ goto fastSingle;
+ }
+ } else if(c<=0x20) {
+ /*
+ * Direct-encoded C0 control code or space.
+ * Reset prev for C0 control codes but not for space.
+ */
+ if(c!=0x20) {
+ prev=BOCU1_ASCII_PREV;
+ }
+ *target++=(UChar)c;
+ continue;
+ } else if(BOCU1_START_NEG_3<=c && c<BOCU1_START_POS_3 && source<sourceLimit) {
+ /* Optimize two-byte case. */
+ if(c>=BOCU1_MIDDLE) {
+ diff=((int32_t)c-BOCU1_START_POS_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_POS_1+1;
+ } else {
+ diff=((int32_t)c-BOCU1_START_NEG_2)*BOCU1_TRAIL_COUNT+BOCU1_REACH_NEG_1;
+ }
+
+ /* trail byte */
+ c=decodeBocu1TrailByte(1, *source++);
+ if(c<0 || (uint32_t)(c=prev+diff+c)>0x10ffff) {
+ bytes[0]=source[-2];
+ bytes[1]=source[-1];
+ byteIndex=2;
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ break;
+ }
+ } else if(c==BOCU1_RESET) {
+ /* only reset the state, no code point */
+ prev=BOCU1_ASCII_PREV;
+ continue;
+ } else {
+ /*
+ * For multi-byte difference lead bytes, set the decoder state
+ * with the partial difference value from the lead byte and
+ * with the number of trail bytes.
+ */
+ bytes[0]=(uint8_t)c;
+ byteIndex=1;
+
+ diff=decodeBocu1LeadByte(c);
+ count=diff&3;
+ diff>>=2;
+getTrail:
+ for(;;) {
+ if(source>=sourceLimit) {
+ goto endloop;
+ }
+ c=bytes[byteIndex++]=*source++;
+
+ /* trail byte in any position */
+ c=decodeBocu1TrailByte(count, c);
+ if(c<0) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+
+ diff+=c;
+ if(--count==0) {
+ /* final trail byte, deliver a code point */
+ byteIndex=0;
+ c=prev+diff;
+ if((uint32_t)c>0x10ffff) {
+ *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+ goto endloop;
+ }
+ break;
+ }
+ }
+ }
+
+ /* calculate the next prev and output c */
+ prev=BOCU1_PREV(c);
+ if(c<=0xffff) {
+ *target++=(UChar)c;
+ } else {
+ /* output surrogate pair */
+ *target++=U16_LEAD(c);
+ if(target<targetLimit) {
+ *target++=U16_TRAIL(c);
+ } else {
+ /* target overflow */
+ cnv->UCharErrorBuffer[0]=U16_TRAIL(c);
+ cnv->UCharErrorBufferLength=1;
+ *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+ break;
+ }
+ }
+ }
+endloop:
+
+ if(*pErrorCode==U_ILLEGAL_CHAR_FOUND) {
+ /* set the converter state in UConverter to deal with the next character */
+ cnv->toUnicodeStatus=BOCU1_ASCII_PREV;
+ cnv->mode=0;
+ } else {
+ /* set the converter state back into UConverter */
+ cnv->toUnicodeStatus=(uint32_t)prev;
+ cnv->mode=(diff<<2)|count;
+ }
+ cnv->toULength=byteIndex;
+
+ /* write back the updated pointers */
+ pArgs->source=(const char *)source;
+ pArgs->target=target;
+ return;
+}
+
+/* miscellaneous ------------------------------------------------------------ */
+
+static const UConverterImpl _Bocu1Impl={
+ UCNV_BOCU1,
+
+ NULL,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+
+ _Bocu1ToUnicode,
+ _Bocu1ToUnicodeWithOffsets,
+ _Bocu1FromUnicode,
+ _Bocu1FromUnicodeWithOffsets,
+ NULL,
+
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ucnv_getCompleteUnicodeSet,
+
+ NULL,
+ NULL
+};
+
+static const UConverterStaticData _Bocu1StaticData={
+ sizeof(UConverterStaticData),
+ "BOCU-1",
+ 1214, /* CCSID for BOCU-1 */
+ UCNV_IBM, UCNV_BOCU1,
+ 1, 4, /* one UChar generates at least 1 byte and at most 4 bytes */
+ { 0x1a, 0, 0, 0 }, 1, /* BOCU-1 never needs to write a subchar */
+ FALSE, FALSE,
+ 0,
+ 0,
+ { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _Bocu1Data=
+ UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Bocu1StaticData, &_Bocu1Impl);
+
+#endif