Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 2 of 2.

author: mcheshkov <mcheshkov@yandex-team.ru> 2022-02-10 16:46:16 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:46:16 +0300
commit: 1312621288956f199a5bd5342b0133d4395fa725 (patch)
tree: 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/icu/common/ucnv_u16.cpp
parent: e9d19cec64684c9c1e6b0c98297e5b895cf904fe (diff)
download: ydb-1312621288956f199a5bd5342b0133d4395fa725.tar.gz
1 files changed, 1577 insertions, 1577 deletions
diff --git a/contrib/libs/icu/common/ucnv_u16.cpp b/contrib/libs/icu/common/ucnv_u16.cpp
index 0f76feb25e..a5e8367400 100644
--- a/contrib/libs/icu/common/ucnv_u16.cpp
+++ b/contrib/libs/icu/common/ucnv_u16.cpp
@@ -1,1579 +1,1579 @@
-// © 2016 and later: Unicode, Inc. and others. 
-// License & terms of use: http://www.unicode.org/copyright.html 
-/*   
-********************************************************************** 
-*   Copyright (C) 2002-2015, International Business Machines 
-*   Corporation and others.  All Rights Reserved. 
-********************************************************************** 
-*   file name:  ucnv_u16.c 
-*   encoding:   UTF-8 
-*   tab size:   8 (not used) 
-*   indentation:4 
-* 
-*   created on: 2002jul01 
-*   created by: Markus W. Scherer 
-* 
-*   UTF-16 converter implementation. Used to be in ucnv_utf.c. 
-*/ 
- 
-#include "unicode/utypes.h" 
- 
-#if !UCONFIG_NO_CONVERSION 
- 
-#include "unicode/ucnv.h" 
-#include "unicode/uversion.h" 
-#include "ucnv_bld.h" 
-#include "ucnv_cnv.h" 
-#include "cmemory.h" 
- 
-enum { 
-    UCNV_NEED_TO_WRITE_BOM=1 
-}; 
- 
-U_CDECL_BEGIN 
-/* 
- * The UTF-16 toUnicode implementation is also used for the Java-specific 
- * "with BOM" variants of UTF-16BE and UTF-16LE. 
- */ 
-static void  U_CALLCONV 
-_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 
-                           UErrorCode *pErrorCode); 
- 
-/* UTF-16BE ----------------------------------------------------------------- */ 
- 
-#if U_IS_BIG_ENDIAN 
-#   define _UTF16PEFromUnicodeWithOffsets   _UTF16BEFromUnicodeWithOffsets 
-#else 
-#   define _UTF16PEFromUnicodeWithOffsets   _UTF16LEFromUnicodeWithOffsets 
-#endif 
- 
- 
-static void  U_CALLCONV 
-_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 
-                               UErrorCode *pErrorCode) { 
-    UConverter *cnv; 
-    const UChar *source; 
-    char *target; 
-    int32_t *offsets; 
- 
-    uint32_t targetCapacity, length, sourceIndex; 
-    UChar c, trail; 
-    char overflow[4]; 
- 
-    source=pArgs->source; 
-    length=(int32_t)(pArgs->sourceLimit-source); 
-    if(length<=0) { 
-        /* no input, nothing to do */ 
-        return; 
-    } 
- 
-    cnv=pArgs->converter; 
- 
-    /* write the BOM if necessary */ 
-    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 
-        static const char bom[]={ (char)0xfeu, (char)0xffu }; 
-        ucnv_fromUWriteBytes(cnv, 
-                             bom, 2, 
-                             &pArgs->target, pArgs->targetLimit, 
-                             &pArgs->offsets, -1, 
-                             pErrorCode); 
-        cnv->fromUnicodeStatus=0; 
-    } 
- 
-    target=pArgs->target; 
-    if(target >= pArgs->targetLimit) { 
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-        return; 
-    } 
- 
-    targetCapacity=(uint32_t)(pArgs->targetLimit-target); 
-    offsets=pArgs->offsets; 
-    sourceIndex=0; 
- 
-    /* c!=0 indicates in several places outside the main loops that a surrogate was found */ 
- 
-    if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { 
-        /* the last buffer ended with a lead surrogate, output the surrogate pair */ 
-        ++source; 
-        --length; 
-        target[0]=(uint8_t)(c>>8); 
-        target[1]=(uint8_t)c; 
-        target[2]=(uint8_t)(trail>>8); 
-        target[3]=(uint8_t)trail; 
-        target+=4; 
-        targetCapacity-=4; 
-        if(offsets!=NULL) { 
-            *offsets++=-1; 
-            *offsets++=-1; 
-            *offsets++=-1; 
-            *offsets++=-1; 
-        } 
-        sourceIndex=1; 
-        cnv->fromUChar32=c=0; 
-    } 
- 
-    if(c==0) { 
-        /* copy an even number of bytes for complete UChars */ 
-        uint32_t count=2*length; 
-        if(count>targetCapacity) { 
-            count=targetCapacity&~1; 
-        } 
-        /* count is even */ 
-        targetCapacity-=count; 
-        count>>=1; 
-        length-=count; 
- 
-        if(offsets==NULL) { 
-            while(count>0) { 
-                c=*source++; 
-                if(U16_IS_SINGLE(c)) { 
-                    target[0]=(uint8_t)(c>>8); 
-                    target[1]=(uint8_t)c; 
-                    target+=2; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 
-                    ++source; 
-                    --count; 
-                    target[0]=(uint8_t)(c>>8); 
-                    target[1]=(uint8_t)c; 
-                    target[2]=(uint8_t)(trail>>8); 
-                    target[3]=(uint8_t)trail; 
-                    target+=4; 
-                } else { 
-                    break; 
-                } 
-                --count; 
-            } 
-        } else { 
-            while(count>0) { 
-                c=*source++; 
-                if(U16_IS_SINGLE(c)) { 
-                    target[0]=(uint8_t)(c>>8); 
-                    target[1]=(uint8_t)c; 
-                    target+=2; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex++; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 
-                    ++source; 
-                    --count; 
-                    target[0]=(uint8_t)(c>>8); 
-                    target[1]=(uint8_t)c; 
-                    target[2]=(uint8_t)(trail>>8); 
-                    target[3]=(uint8_t)trail; 
-                    target+=4; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    sourceIndex+=2; 
-                } else { 
-                    break; 
-                } 
-                --count; 
-            } 
-        } 
- 
-        if(count==0) { 
-            /* done with the loop for complete UChars */ 
-            if(length>0 && targetCapacity>0) { 
-                /* 
-                 * there is more input and some target capacity - 
-                 * it must be targetCapacity==1 because otherwise 
-                 * the above would have copied more; 
-                 * prepare for overflow output 
-                 */ 
-                if(U16_IS_SINGLE(c=*source++)) { 
-                    overflow[0]=(char)(c>>8); 
-                    overflow[1]=(char)c; 
-                    length=2; /* 2 bytes to output */ 
-                    c=0; 
-                /* } else { keep c for surrogate handling, length will be set there */ 
-                } 
-            } else { 
-                length=0; 
-                c=0; 
-            } 
-        } else { 
-            /* keep c for surrogate handling, length will be set there */ 
-            targetCapacity+=2*count; 
-        } 
-    } else { 
-        length=0; /* from here on, length counts the bytes in overflow[] */ 
-    } 
-     
-    if(c!=0) { 
-        /* 
-         * c is a surrogate, and 
-         * - source or target too short 
-         * - or the surrogate is unmatched 
-         */ 
-        length=0; 
-        if(U16_IS_SURROGATE_LEAD(c)) { 
-            if(source<pArgs->sourceLimit) { 
-                if(U16_IS_TRAIL(trail=*source)) { 
-                    /* output the surrogate pair, will overflow (see conditions comment above) */ 
-                    ++source; 
-                    overflow[0]=(char)(c>>8); 
-                    overflow[1]=(char)c; 
-                    overflow[2]=(char)(trail>>8); 
-                    overflow[3]=(char)trail; 
-                    length=4; /* 4 bytes to output */ 
-                    c=0; 
-                } else { 
-                    /* unmatched lead surrogate */ 
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-                } 
-            } else { 
-                /* see if the trail surrogate is in the next buffer */ 
-            } 
-        } else { 
-            /* unmatched trail surrogate */ 
-            *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-        } 
-        cnv->fromUChar32=c; 
-    } 
- 
-    if(length>0) { 
-        /* output length bytes with overflow (length>targetCapacity>0) */ 
-        ucnv_fromUWriteBytes(cnv, 
-                             overflow, length, 
-                             (char **)&target, pArgs->targetLimit, 
-                             &offsets, sourceIndex, 
-                             pErrorCode); 
-        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); 
-    } 
- 
-    if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { 
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-    } 
- 
-    /* write back the updated pointers */ 
-    pArgs->source=source; 
-    pArgs->target=(char *)target; 
-    pArgs->offsets=offsets; 
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*  
+**********************************************************************
+*   Copyright (C) 2002-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  ucnv_u16.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002jul01
+*   created by: Markus W. Scherer
+*
+*   UTF-16 converter implementation. Used to be in ucnv_utf.c.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/uversion.h"
+#include "ucnv_bld.h"
+#include "ucnv_cnv.h"
+#include "cmemory.h"
+
+enum {
+    UCNV_NEED_TO_WRITE_BOM=1
+};
+
+U_CDECL_BEGIN
+/*
+ * The UTF-16 toUnicode implementation is also used for the Java-specific
+ * "with BOM" variants of UTF-16BE and UTF-16LE.
+ */
+static void  U_CALLCONV
+_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode);
+
+/* UTF-16BE ----------------------------------------------------------------- */
+
+#if U_IS_BIG_ENDIAN
+#   define _UTF16PEFromUnicodeWithOffsets   _UTF16BEFromUnicodeWithOffsets
+#else
+#   define _UTF16PEFromUnicodeWithOffsets   _UTF16LEFromUnicodeWithOffsets
+#endif
+
+
+static void  U_CALLCONV
+_UTF16BEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+                               UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const UChar *source;
+    char *target;
+    int32_t *offsets;
+
+    uint32_t targetCapacity, length, sourceIndex;
+    UChar c, trail;
+    char overflow[4];
+
+    source=pArgs->source;
+    length=(int32_t)(pArgs->sourceLimit-source);
+    if(length<=0) {
+        /* no input, nothing to do */
+        return;
+    }
+
+    cnv=pArgs->converter;
+
+    /* write the BOM if necessary */
+    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+        static const char bom[]={ (char)0xfeu, (char)0xffu };
+        ucnv_fromUWriteBytes(cnv,
+                             bom, 2,
+                             &pArgs->target, pArgs->targetLimit,
+                             &pArgs->offsets, -1,
+                             pErrorCode);
+        cnv->fromUnicodeStatus=0;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        return;
+    }
+
+    targetCapacity=(uint32_t)(pArgs->targetLimit-target);
+    offsets=pArgs->offsets;
+    sourceIndex=0;
+
+    /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+
+    if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
+        /* the last buffer ended with a lead surrogate, output the surrogate pair */
+        ++source;
+        --length;
+        target[0]=(uint8_t)(c>>8);
+        target[1]=(uint8_t)c;
+        target[2]=(uint8_t)(trail>>8);
+        target[3]=(uint8_t)trail;
+        target+=4;
+        targetCapacity-=4;
+        if(offsets!=NULL) {
+            *offsets++=-1;
+            *offsets++=-1;
+            *offsets++=-1;
+            *offsets++=-1;
+        }
+        sourceIndex=1;
+        cnv->fromUChar32=c=0;
+    }
+
+    if(c==0) {
+        /* copy an even number of bytes for complete UChars */
+        uint32_t count=2*length;
+        if(count>targetCapacity) {
+            count=targetCapacity&~1;
+        }
+        /* count is even */
+        targetCapacity-=count;
+        count>>=1;
+        length-=count;
+
+        if(offsets==NULL) {
+            while(count>0) {
+                c=*source++;
+                if(U16_IS_SINGLE(c)) {
+                    target[0]=(uint8_t)(c>>8);
+                    target[1]=(uint8_t)c;
+                    target+=2;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+                    ++source;
+                    --count;
+                    target[0]=(uint8_t)(c>>8);
+                    target[1]=(uint8_t)c;
+                    target[2]=(uint8_t)(trail>>8);
+                    target[3]=(uint8_t)trail;
+                    target+=4;
+                } else {
+                    break;
+                }
+                --count;
+            }
+        } else {
+            while(count>0) {
+                c=*source++;
+                if(U16_IS_SINGLE(c)) {
+                    target[0]=(uint8_t)(c>>8);
+                    target[1]=(uint8_t)c;
+                    target+=2;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex++;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+                    ++source;
+                    --count;
+                    target[0]=(uint8_t)(c>>8);
+                    target[1]=(uint8_t)c;
+                    target[2]=(uint8_t)(trail>>8);
+                    target[3]=(uint8_t)trail;
+                    target+=4;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    sourceIndex+=2;
+                } else {
+                    break;
+                }
+                --count;
+            }
+        }
+
+        if(count==0) {
+            /* done with the loop for complete UChars */
+            if(length>0 && targetCapacity>0) {
+                /*
+                 * there is more input and some target capacity -
+                 * it must be targetCapacity==1 because otherwise
+                 * the above would have copied more;
+                 * prepare for overflow output
+                 */
+                if(U16_IS_SINGLE(c=*source++)) {
+                    overflow[0]=(char)(c>>8);
+                    overflow[1]=(char)c;
+                    length=2; /* 2 bytes to output */
+                    c=0;
+                /* } else { keep c for surrogate handling, length will be set there */
+                }
+            } else {
+                length=0;
+                c=0;
+            }
+        } else {
+            /* keep c for surrogate handling, length will be set there */
+            targetCapacity+=2*count;
+        }
+    } else {
+        length=0; /* from here on, length counts the bytes in overflow[] */
+    }
+    
+    if(c!=0) {
+        /*
+         * c is a surrogate, and
+         * - source or target too short
+         * - or the surrogate is unmatched
+         */
+        length=0;
+        if(U16_IS_SURROGATE_LEAD(c)) {
+            if(source<pArgs->sourceLimit) {
+                if(U16_IS_TRAIL(trail=*source)) {
+                    /* output the surrogate pair, will overflow (see conditions comment above) */
+                    ++source;
+                    overflow[0]=(char)(c>>8);
+                    overflow[1]=(char)c;
+                    overflow[2]=(char)(trail>>8);
+                    overflow[3]=(char)trail;
+                    length=4; /* 4 bytes to output */
+                    c=0;
+                } else {
+                    /* unmatched lead surrogate */
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                }
+            } else {
+                /* see if the trail surrogate is in the next buffer */
+            }
+        } else {
+            /* unmatched trail surrogate */
+            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+        }
+        cnv->fromUChar32=c;
+    }
+
+    if(length>0) {
+        /* output length bytes with overflow (length>targetCapacity>0) */
+        ucnv_fromUWriteBytes(cnv,
+                             overflow, length,
+                             (char **)&target, pArgs->targetLimit,
+                             &offsets, sourceIndex,
+                             pErrorCode);
+        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
+    }
+
+    if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+    }
+
+    /* write back the updated pointers */
+    pArgs->source=source;
+    pArgs->target=(char *)target;
+    pArgs->offsets=offsets;
+}
+
+static void  U_CALLCONV
+_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                             UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const uint8_t *source;
+    UChar *target;
+    int32_t *offsets;
+
+    uint32_t targetCapacity, length, count, sourceIndex;
+    UChar c, trail;
+
+    if(pArgs->converter->mode<8) {
+        _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
+        return;
+    }
+
+    cnv=pArgs->converter;
+    source=(const uint8_t *)pArgs->source;
+    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
+    if(length<=0 && cnv->toUnicodeStatus==0) {
+        /* no input, nothing to do */
+        return;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        return;
+    }
+
+    targetCapacity=(uint32_t)(pArgs->targetLimit-target);
+    offsets=pArgs->offsets;
+    sourceIndex=0;
+    c=0;
+
+    /* complete a partial UChar or pair from the last call */
+    if(cnv->toUnicodeStatus!=0) {
+        /*
+         * special case: single byte from a previous buffer,
+         * where the byte turned out not to belong to a trail surrogate
+         * and the preceding, unmatched lead surrogate was put into toUBytes[]
+         * for error handling
+         */
+        cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
+        cnv->toULength=1;
+        cnv->toUnicodeStatus=0;
+    }
+    if((count=cnv->toULength)!=0) {
+        uint8_t *p=cnv->toUBytes;
+        do {
+            p[count++]=*source++;
+            ++sourceIndex;
+            --length;
+            if(count==2) {
+                c=((UChar)p[0]<<8)|p[1];
+                if(U16_IS_SINGLE(c)) {
+                    /* output the BMP code point */
+                    *target++=c;
+                    if(offsets!=NULL) {
+                        *offsets++=-1;
+                    }
+                    --targetCapacity;
+                    count=0;
+                    c=0;
+                    break;
+                } else if(U16_IS_SURROGATE_LEAD(c)) {
+                    /* continue collecting bytes for the trail surrogate */
+                    c=0; /* avoid unnecessary surrogate handling below */
+                } else {
+                    /* fall through to error handling for an unmatched trail surrogate */
+                    break;
+                }
+            } else if(count==4) {
+                c=((UChar)p[0]<<8)|p[1];
+                trail=((UChar)p[2]<<8)|p[3];
+                if(U16_IS_TRAIL(trail)) {
+                    /* output the surrogate pair */
+                    *target++=c;
+                    if(targetCapacity>=2) {
+                        *target++=trail;
+                        if(offsets!=NULL) {
+                            *offsets++=-1;
+                            *offsets++=-1;
+                        }
+                        targetCapacity-=2;
+                    } else /* targetCapacity==1 */ {
+                        targetCapacity=0;
+                        cnv->UCharErrorBuffer[0]=trail;
+                        cnv->UCharErrorBufferLength=1;
+                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                    }
+                    count=0;
+                    c=0;
+                    break;
+                } else {
+                    /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+
+                    /* back out reading the code unit after it */
+                    if(((const uint8_t *)pArgs->source-source)>=2) {
+                        source-=2;
+                    } else {
+                        /*
+                         * if the trail unit's first byte was in a previous buffer, then
+                         * we need to put it into a special place because toUBytes[] will be
+                         * used for the lead unit's bytes
+                         */
+                        cnv->toUnicodeStatus=0x100|p[2];
+                        --source;
+                    }
+                    cnv->toULength=2;
+
+                    /* write back the updated pointers */
+                    pArgs->source=(const char *)source;
+                    pArgs->target=target;
+                    pArgs->offsets=offsets;
+                    return;
+                }
+            }
+        } while(length>0);
+        cnv->toULength=(int8_t)count;
+    }
+
+    /* copy an even number of bytes for complete UChars */
+    count=2*targetCapacity;
+    if(count>length) {
+        count=length&~1;
+    }
+    if(c==0 && count>0) {
+        length-=count;
+        count>>=1;
+        targetCapacity-=count;
+        if(offsets==NULL) {
+            do {
+                c=((UChar)source[0]<<8)|source[1];
+                source+=2;
+                if(U16_IS_SINGLE(c)) {
+                    *target++=c;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+                          U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
+                ) {
+                    source+=2;
+                    --count;
+                    *target++=c;
+                    *target++=trail;
+                } else {
+                    break;
+                }
+            } while(--count>0);
+        } else {
+            do {
+                c=((UChar)source[0]<<8)|source[1];
+                source+=2;
+                if(U16_IS_SINGLE(c)) {
+                    *target++=c;
+                    *offsets++=sourceIndex;
+                    sourceIndex+=2;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+                          U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])
+                ) {
+                    source+=2;
+                    --count;
+                    *target++=c;
+                    *target++=trail;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    sourceIndex+=4;
+                } else {
+                    break;
+                }
+            } while(--count>0);
+        }
+
+        if(count==0) {
+            /* done with the loop for complete UChars */
+            c=0;
+        } else {
+            /* keep c for surrogate handling, trail will be set there */
+            length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+            targetCapacity+=count;
+        }
+    }
+
+    if(c!=0) {
+        /*
+         * c is a surrogate, and
+         * - source or target too short
+         * - or the surrogate is unmatched
+         */
+        cnv->toUBytes[0]=(uint8_t)(c>>8);
+        cnv->toUBytes[1]=(uint8_t)c;
+        cnv->toULength=2;
+
+        if(U16_IS_SURROGATE_LEAD(c)) {
+            if(length>=2) {
+                if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) {
+                    /* output the surrogate pair, will overflow (see conditions comment above) */
+                    source+=2;
+                    length-=2;
+                    *target++=c;
+                    if(offsets!=NULL) {
+                        *offsets++=sourceIndex;
+                    }
+                    cnv->UCharErrorBuffer[0]=trail;
+                    cnv->UCharErrorBufferLength=1;
+                    cnv->toULength=0;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                } else {
+                    /* unmatched lead surrogate */
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                }
+            } else {
+                /* see if the trail surrogate is in the next buffer */
+            }
+        } else {
+            /* unmatched trail surrogate */
+            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+        }
+    }
+
+    if(U_SUCCESS(*pErrorCode)) {
+        /* check for a remaining source byte */
+        if(length>0) {
+            if(targetCapacity==0) {
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            } else {
+                /* it must be length==1 because otherwise the above would have copied more */
+                cnv->toUBytes[cnv->toULength++]=*source++;
+            }
+        }
+    }
+
+    /* write back the updated pointers */
+    pArgs->source=(const char *)source;
+    pArgs->target=target;
+    pArgs->offsets=offsets;
+}
+
+static UChar32  U_CALLCONV
+_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+    const uint8_t *s, *sourceLimit;
+    UChar32 c;
+
+    if(pArgs->converter->mode<8) {
+        return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+    }
+
+    s=(const uint8_t *)pArgs->source;
+    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+
+    if(s>=sourceLimit) {
+        /* no input */
+        *err=U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0xffff;
+    }
+
+    if(s+2>sourceLimit) {
+        /* only one byte: truncated UChar */
+        pArgs->converter->toUBytes[0]=*s++;
+        pArgs->converter->toULength=1;
+        pArgs->source=(const char *)s;
+        *err = U_TRUNCATED_CHAR_FOUND;
+        return 0xffff;
+    }
+
+    /* get one UChar */
+    c=((UChar32)*s<<8)|s[1];
+    s+=2;
+
+    /* check for a surrogate pair */
+    if(U_IS_SURROGATE(c)) {
+        if(U16_IS_SURROGATE_LEAD(c)) {
+            if(s+2<=sourceLimit) {
+                UChar trail;
+
+                /* get a second UChar and see if it is a trail surrogate */
+                trail=((UChar)*s<<8)|s[1];
+                if(U16_IS_TRAIL(trail)) {
+                    c=U16_GET_SUPPLEMENTARY(c, trail);
+                    s+=2;
+                } else {
+                    /* unmatched lead surrogate */
+                    c=-2;
+                }
+            } else {
+                /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
+                uint8_t *bytes=pArgs->converter->toUBytes;
+                s-=2;
+                pArgs->converter->toULength=(int8_t)(sourceLimit-s);
+                do {
+                    *bytes++=*s++;
+                } while(s<sourceLimit);
+
+                c=0xffff;
+                *err=U_TRUNCATED_CHAR_FOUND;
+            }
+        } else {
+            /* unmatched trail surrogate */
+            c=-2;
+        }
+
+        if(c<0) {
+            /* write the unmatched surrogate */
+            uint8_t *bytes=pArgs->converter->toUBytes;
+            pArgs->converter->toULength=2;
+            *bytes=*(s-2);
+            bytes[1]=*(s-1);
+
+            c=0xffff;
+            *err=U_ILLEGAL_CHAR_FOUND;
+        }
+    }
+
+    pArgs->source=(const char *)s;
+    return c;
 } 
- 
-static void  U_CALLCONV 
-_UTF16BEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 
-                             UErrorCode *pErrorCode) { 
-    UConverter *cnv; 
-    const uint8_t *source; 
-    UChar *target; 
-    int32_t *offsets; 
- 
-    uint32_t targetCapacity, length, count, sourceIndex; 
-    UChar c, trail; 
- 
-    if(pArgs->converter->mode<8) { 
-        _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); 
-        return; 
-    } 
- 
-    cnv=pArgs->converter; 
-    source=(const uint8_t *)pArgs->source; 
-    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); 
-    if(length<=0 && cnv->toUnicodeStatus==0) { 
-        /* no input, nothing to do */ 
-        return; 
-    } 
- 
-    target=pArgs->target; 
-    if(target >= pArgs->targetLimit) { 
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-        return; 
-    } 
- 
-    targetCapacity=(uint32_t)(pArgs->targetLimit-target); 
-    offsets=pArgs->offsets; 
-    sourceIndex=0; 
-    c=0; 
- 
-    /* complete a partial UChar or pair from the last call */ 
-    if(cnv->toUnicodeStatus!=0) { 
-        /* 
-         * special case: single byte from a previous buffer, 
-         * where the byte turned out not to belong to a trail surrogate 
-         * and the preceding, unmatched lead surrogate was put into toUBytes[] 
-         * for error handling 
-         */ 
-        cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; 
-        cnv->toULength=1; 
-        cnv->toUnicodeStatus=0; 
-    } 
-    if((count=cnv->toULength)!=0) { 
-        uint8_t *p=cnv->toUBytes; 
-        do { 
-            p[count++]=*source++; 
-            ++sourceIndex; 
-            --length; 
-            if(count==2) { 
-                c=((UChar)p[0]<<8)|p[1]; 
-                if(U16_IS_SINGLE(c)) { 
-                    /* output the BMP code point */ 
-                    *target++=c; 
-                    if(offsets!=NULL) { 
-                        *offsets++=-1; 
-                    } 
-                    --targetCapacity; 
-                    count=0; 
-                    c=0; 
-                    break; 
-                } else if(U16_IS_SURROGATE_LEAD(c)) { 
-                    /* continue collecting bytes for the trail surrogate */ 
-                    c=0; /* avoid unnecessary surrogate handling below */ 
-                } else { 
-                    /* fall through to error handling for an unmatched trail surrogate */ 
-                    break; 
-                } 
-            } else if(count==4) { 
-                c=((UChar)p[0]<<8)|p[1]; 
-                trail=((UChar)p[2]<<8)|p[3]; 
-                if(U16_IS_TRAIL(trail)) { 
-                    /* output the surrogate pair */ 
-                    *target++=c; 
-                    if(targetCapacity>=2) { 
-                        *target++=trail; 
-                        if(offsets!=NULL) { 
-                            *offsets++=-1; 
-                            *offsets++=-1; 
-                        } 
-                        targetCapacity-=2; 
-                    } else /* targetCapacity==1 */ { 
-                        targetCapacity=0; 
-                        cnv->UCharErrorBuffer[0]=trail; 
-                        cnv->UCharErrorBufferLength=1; 
-                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-                    } 
-                    count=0; 
-                    c=0; 
-                    break; 
-                } else { 
-                    /* unmatched lead surrogate, handle here for consistent toUBytes[] */ 
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
- 
-                    /* back out reading the code unit after it */ 
-                    if(((const uint8_t *)pArgs->source-source)>=2) { 
-                        source-=2; 
-                    } else { 
-                        /* 
-                         * if the trail unit's first byte was in a previous buffer, then 
-                         * we need to put it into a special place because toUBytes[] will be 
-                         * used for the lead unit's bytes 
-                         */ 
-                        cnv->toUnicodeStatus=0x100|p[2]; 
-                        --source; 
-                    } 
-                    cnv->toULength=2; 
- 
-                    /* write back the updated pointers */ 
-                    pArgs->source=(const char *)source; 
-                    pArgs->target=target; 
-                    pArgs->offsets=offsets; 
-                    return; 
-                } 
-            } 
-        } while(length>0); 
-        cnv->toULength=(int8_t)count; 
-    } 
- 
-    /* copy an even number of bytes for complete UChars */ 
-    count=2*targetCapacity; 
-    if(count>length) { 
-        count=length&~1; 
-    } 
-    if(c==0 && count>0) { 
-        length-=count; 
-        count>>=1; 
-        targetCapacity-=count; 
-        if(offsets==NULL) { 
-            do { 
-                c=((UChar)source[0]<<8)|source[1]; 
-                source+=2; 
-                if(U16_IS_SINGLE(c)) { 
-                    *target++=c; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 
-                          U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) 
-                ) { 
-                    source+=2; 
-                    --count; 
-                    *target++=c; 
-                    *target++=trail; 
-                } else { 
-                    break; 
-                } 
-            } while(--count>0); 
-        } else { 
-            do { 
-                c=((UChar)source[0]<<8)|source[1]; 
-                source+=2; 
-                if(U16_IS_SINGLE(c)) { 
-                    *target++=c; 
-                    *offsets++=sourceIndex; 
-                    sourceIndex+=2; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 
-                          U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1]) 
-                ) { 
-                    source+=2; 
-                    --count; 
-                    *target++=c; 
-                    *target++=trail; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    sourceIndex+=4; 
-                } else { 
-                    break; 
-                } 
-            } while(--count>0); 
-        } 
- 
-        if(count==0) { 
-            /* done with the loop for complete UChars */ 
-            c=0; 
-        } else { 
-            /* keep c for surrogate handling, trail will be set there */ 
-            length+=2*(count-1); /* one more byte pair was consumed than count decremented */ 
-            targetCapacity+=count; 
-        } 
-    } 
- 
-    if(c!=0) { 
-        /* 
-         * c is a surrogate, and 
-         * - source or target too short 
-         * - or the surrogate is unmatched 
-         */ 
-        cnv->toUBytes[0]=(uint8_t)(c>>8); 
-        cnv->toUBytes[1]=(uint8_t)c; 
-        cnv->toULength=2; 
- 
-        if(U16_IS_SURROGATE_LEAD(c)) { 
-            if(length>=2) { 
-                if(U16_IS_TRAIL(trail=((UChar)source[0]<<8)|source[1])) { 
-                    /* output the surrogate pair, will overflow (see conditions comment above) */ 
-                    source+=2; 
-                    length-=2; 
-                    *target++=c; 
-                    if(offsets!=NULL) { 
-                        *offsets++=sourceIndex; 
-                    } 
-                    cnv->UCharErrorBuffer[0]=trail; 
-                    cnv->UCharErrorBufferLength=1; 
-                    cnv->toULength=0; 
-                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-                } else { 
-                    /* unmatched lead surrogate */ 
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-                } 
-            } else { 
-                /* see if the trail surrogate is in the next buffer */ 
-            } 
-        } else { 
-            /* unmatched trail surrogate */ 
-            *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-        } 
-    } 
- 
-    if(U_SUCCESS(*pErrorCode)) { 
-        /* check for a remaining source byte */ 
-        if(length>0) { 
-            if(targetCapacity==0) { 
-                *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-            } else { 
-                /* it must be length==1 because otherwise the above would have copied more */ 
-                cnv->toUBytes[cnv->toULength++]=*source++; 
-            } 
-        } 
-    } 
- 
-    /* write back the updated pointers */ 
-    pArgs->source=(const char *)source; 
-    pArgs->target=target; 
-    pArgs->offsets=offsets; 
+
+static void  U_CALLCONV
+_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) {
+    if(choice<=UCNV_RESET_TO_UNICODE) {
+        /* reset toUnicode state */
+        if(UCNV_GET_VERSION(cnv)==0) {
+            cnv->mode=8; /* no BOM handling */
+        } else {
+            cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */
+        }
+    }
+    if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
+        /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */
+        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+    }
+}
+
+static void  U_CALLCONV
+_UTF16BEOpen(UConverter *cnv,
+             UConverterLoadArgs *pArgs,
+             UErrorCode *pErrorCode) {
+    (void)pArgs;
+    if(UCNV_GET_VERSION(cnv)<=1) {
+        _UTF16BEReset(cnv, UCNV_RESET_BOTH);
+    } else {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+static const char *  U_CALLCONV
+_UTF16BEGetName(const UConverter *cnv) {
+    if(UCNV_GET_VERSION(cnv)==0) {
+        return "UTF-16BE";
+    } else {
+        return "UTF-16BE,version=1";
+    }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF16BEImpl={
+    UCNV_UTF16_BigEndian,
+
+    NULL,
+    NULL,
+
+    _UTF16BEOpen,
+    NULL,
+    _UTF16BEReset,
+
+    _UTF16BEToUnicodeWithOffsets,
+    _UTF16BEToUnicodeWithOffsets,
+    _UTF16BEFromUnicodeWithOffsets,
+    _UTF16BEFromUnicodeWithOffsets,
+    _UTF16BEGetNextUChar,
+
+    NULL,
+    _UTF16BEGetName,
+    NULL,
+    NULL,
+    ucnv_getNonSurrogateUnicodeSet,
+
+    NULL,
+    NULL
+};
+
+static const UConverterStaticData _UTF16BEStaticData={
+    sizeof(UConverterStaticData),
+    "UTF-16BE",
+    1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2,
+    { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE,
+    0,
+    0,
+    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _UTF16BEData=
+        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl);
+
+/* UTF-16LE ----------------------------------------------------------------- */
+U_CDECL_BEGIN
+static void  U_CALLCONV
+_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
+                               UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const UChar *source;
+    char *target;
+    int32_t *offsets;
+
+    uint32_t targetCapacity, length, sourceIndex;
+    UChar c, trail;
+    char overflow[4];
+
+    source=pArgs->source;
+    length=(int32_t)(pArgs->sourceLimit-source);
+    if(length<=0) {
+        /* no input, nothing to do */
+        return;
+    }
+
+    cnv=pArgs->converter;
+
+    /* write the BOM if necessary */
+    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) {
+        static const char bom[]={ (char)0xffu, (char)0xfeu };
+        ucnv_fromUWriteBytes(cnv,
+                             bom, 2,
+                             &pArgs->target, pArgs->targetLimit,
+                             &pArgs->offsets, -1,
+                             pErrorCode);
+        cnv->fromUnicodeStatus=0;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        return;
+    }
+
+    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
+    offsets=pArgs->offsets;
+    sourceIndex=0;
+
+    /* c!=0 indicates in several places outside the main loops that a surrogate was found */
+
+    if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) {
+        /* the last buffer ended with a lead surrogate, output the surrogate pair */
+        ++source;
+        --length;
+        target[0]=(uint8_t)c;
+        target[1]=(uint8_t)(c>>8);
+        target[2]=(uint8_t)trail;
+        target[3]=(uint8_t)(trail>>8);
+        target+=4;
+        targetCapacity-=4;
+        if(offsets!=NULL) {
+            *offsets++=-1;
+            *offsets++=-1;
+            *offsets++=-1;
+            *offsets++=-1;
+        }
+        sourceIndex=1;
+        cnv->fromUChar32=c=0;
+    }
+
+    if(c==0) {
+        /* copy an even number of bytes for complete UChars */
+        uint32_t count=2*length;
+        if(count>targetCapacity) {
+            count=targetCapacity&~1;
+        }
+        /* count is even */
+        targetCapacity-=count;
+        count>>=1;
+        length-=count;
+
+        if(offsets==NULL) {
+            while(count>0) {
+                c=*source++;
+                if(U16_IS_SINGLE(c)) {
+                    target[0]=(uint8_t)c;
+                    target[1]=(uint8_t)(c>>8);
+                    target+=2;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+                    ++source;
+                    --count;
+                    target[0]=(uint8_t)c;
+                    target[1]=(uint8_t)(c>>8);
+                    target[2]=(uint8_t)trail;
+                    target[3]=(uint8_t)(trail>>8);
+                    target+=4;
+                } else {
+                    break;
+                }
+                --count;
+            }
+        } else {
+            while(count>0) {
+                c=*source++;
+                if(U16_IS_SINGLE(c)) {
+                    target[0]=(uint8_t)c;
+                    target[1]=(uint8_t)(c>>8);
+                    target+=2;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex++;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) {
+                    ++source;
+                    --count;
+                    target[0]=(uint8_t)c;
+                    target[1]=(uint8_t)(c>>8);
+                    target[2]=(uint8_t)trail;
+                    target[3]=(uint8_t)(trail>>8);
+                    target+=4;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    sourceIndex+=2;
+                } else {
+                    break;
+                }
+                --count;
+            }
+        }
+
+        if(count==0) {
+            /* done with the loop for complete UChars */
+            if(length>0 && targetCapacity>0) {
+                /*
+                 * there is more input and some target capacity -
+                 * it must be targetCapacity==1 because otherwise
+                 * the above would have copied more;
+                 * prepare for overflow output
+                 */
+                if(U16_IS_SINGLE(c=*source++)) {
+                    overflow[0]=(char)c;
+                    overflow[1]=(char)(c>>8);
+                    length=2; /* 2 bytes to output */
+                    c=0;
+                /* } else { keep c for surrogate handling, length will be set there */
+                }
+            } else {
+                length=0;
+                c=0;
+            }
+        } else {
+            /* keep c for surrogate handling, length will be set there */
+            targetCapacity+=2*count;
+        }
+    } else {
+        length=0; /* from here on, length counts the bytes in overflow[] */
+    }
+    
+    if(c!=0) {
+        /*
+         * c is a surrogate, and
+         * - source or target too short
+         * - or the surrogate is unmatched
+         */
+        length=0;
+        if(U16_IS_SURROGATE_LEAD(c)) {
+            if(source<pArgs->sourceLimit) {
+                if(U16_IS_TRAIL(trail=*source)) {
+                    /* output the surrogate pair, will overflow (see conditions comment above) */
+                    ++source;
+                    overflow[0]=(char)c;
+                    overflow[1]=(char)(c>>8);
+                    overflow[2]=(char)trail;
+                    overflow[3]=(char)(trail>>8);
+                    length=4; /* 4 bytes to output */
+                    c=0;
+                } else {
+                    /* unmatched lead surrogate */
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                }
+            } else {
+                /* see if the trail surrogate is in the next buffer */
+            }
+        } else {
+            /* unmatched trail surrogate */
+            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+        }
+        cnv->fromUChar32=c;
+    }
+
+    if(length>0) {
+        /* output length bytes with overflow (length>targetCapacity>0) */
+        ucnv_fromUWriteBytes(cnv,
+                             overflow, length,
+                             &target, pArgs->targetLimit,
+                             &offsets, sourceIndex,
+                             pErrorCode);
+        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target);
+    }
+
+    if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+    }
+
+    /* write back the updated pointers */
+    pArgs->source=source;
+    pArgs->target=target;
+    pArgs->offsets=offsets;
+}
+
+static void  U_CALLCONV
+_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                             UErrorCode *pErrorCode) {
+    UConverter *cnv;
+    const uint8_t *source;
+    UChar *target;
+    int32_t *offsets;
+
+    uint32_t targetCapacity, length, count, sourceIndex;
+    UChar c, trail;
+
+    if(pArgs->converter->mode<8) {
+        _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode);
+        return;
+    }
+
+    cnv=pArgs->converter;
+    source=(const uint8_t *)pArgs->source;
+    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
+    if(length<=0 && cnv->toUnicodeStatus==0) {
+        /* no input, nothing to do */
+        return;
+    }
+
+    target=pArgs->target;
+    if(target >= pArgs->targetLimit) {
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        return;
+    }
+
+    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target);
+    offsets=pArgs->offsets;
+    sourceIndex=0;
+    c=0;
+
+    /* complete a partial UChar or pair from the last call */
+    if(cnv->toUnicodeStatus!=0) {
+        /*
+         * special case: single byte from a previous buffer,
+         * where the byte turned out not to belong to a trail surrogate
+         * and the preceding, unmatched lead surrogate was put into toUBytes[]
+         * for error handling
+         */
+        cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus;
+        cnv->toULength=1;
+        cnv->toUnicodeStatus=0;
+    }
+    if((count=cnv->toULength)!=0) {
+        uint8_t *p=cnv->toUBytes;
+        do {
+            p[count++]=*source++;
+            ++sourceIndex;
+            --length;
+            if(count==2) {
+                c=((UChar)p[1]<<8)|p[0];
+                if(U16_IS_SINGLE(c)) {
+                    /* output the BMP code point */
+                    *target++=c;
+                    if(offsets!=NULL) {
+                        *offsets++=-1;
+                    }
+                    --targetCapacity;
+                    count=0;
+                    c=0;
+                    break;
+                } else if(U16_IS_SURROGATE_LEAD(c)) {
+                    /* continue collecting bytes for the trail surrogate */
+                    c=0; /* avoid unnecessary surrogate handling below */
+                } else {
+                    /* fall through to error handling for an unmatched trail surrogate */
+                    break;
+                }
+            } else if(count==4) {
+                c=((UChar)p[1]<<8)|p[0];
+                trail=((UChar)p[3]<<8)|p[2];
+                if(U16_IS_TRAIL(trail)) {
+                    /* output the surrogate pair */
+                    *target++=c;
+                    if(targetCapacity>=2) {
+                        *target++=trail;
+                        if(offsets!=NULL) {
+                            *offsets++=-1;
+                            *offsets++=-1;
+                        }
+                        targetCapacity-=2;
+                    } else /* targetCapacity==1 */ {
+                        targetCapacity=0;
+                        cnv->UCharErrorBuffer[0]=trail;
+                        cnv->UCharErrorBufferLength=1;
+                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                    }
+                    count=0;
+                    c=0;
+                    break;
+                } else {
+                    /* unmatched lead surrogate, handle here for consistent toUBytes[] */
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+
+                    /* back out reading the code unit after it */
+                    if(((const uint8_t *)pArgs->source-source)>=2) {
+                        source-=2;
+                    } else {
+                        /*
+                         * if the trail unit's first byte was in a previous buffer, then
+                         * we need to put it into a special place because toUBytes[] will be
+                         * used for the lead unit's bytes
+                         */
+                        cnv->toUnicodeStatus=0x100|p[2];
+                        --source;
+                    }
+                    cnv->toULength=2;
+
+                    /* write back the updated pointers */
+                    pArgs->source=(const char *)source;
+                    pArgs->target=target;
+                    pArgs->offsets=offsets;
+                    return;
+                }
+            }
+        } while(length>0);
+        cnv->toULength=(int8_t)count;
+    }
+
+    /* copy an even number of bytes for complete UChars */
+    count=2*targetCapacity;
+    if(count>length) {
+        count=length&~1;
+    }
+    if(c==0 && count>0) {
+        length-=count;
+        count>>=1;
+        targetCapacity-=count;
+        if(offsets==NULL) {
+            do {
+                c=((UChar)source[1]<<8)|source[0];
+                source+=2;
+                if(U16_IS_SINGLE(c)) {
+                    *target++=c;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+                          U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
+                ) {
+                    source+=2;
+                    --count;
+                    *target++=c;
+                    *target++=trail;
+                } else {
+                    break;
+                }
+            } while(--count>0);
+        } else {
+            do {
+                c=((UChar)source[1]<<8)|source[0];
+                source+=2;
+                if(U16_IS_SINGLE(c)) {
+                    *target++=c;
+                    *offsets++=sourceIndex;
+                    sourceIndex+=2;
+                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 &&
+                          U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])
+                ) {
+                    source+=2;
+                    --count;
+                    *target++=c;
+                    *target++=trail;
+                    *offsets++=sourceIndex;
+                    *offsets++=sourceIndex;
+                    sourceIndex+=4;
+                } else {
+                    break;
+                }
+            } while(--count>0);
+        }
+
+        if(count==0) {
+            /* done with the loop for complete UChars */
+            c=0;
+        } else {
+            /* keep c for surrogate handling, trail will be set there */
+            length+=2*(count-1); /* one more byte pair was consumed than count decremented */
+            targetCapacity+=count;
+        }
+    }
+
+    if(c!=0) {
+        /*
+         * c is a surrogate, and
+         * - source or target too short
+         * - or the surrogate is unmatched
+         */
+        cnv->toUBytes[0]=(uint8_t)c;
+        cnv->toUBytes[1]=(uint8_t)(c>>8);
+        cnv->toULength=2;
+
+        if(U16_IS_SURROGATE_LEAD(c)) {
+            if(length>=2) {
+                if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) {
+                    /* output the surrogate pair, will overflow (see conditions comment above) */
+                    source+=2;
+                    length-=2;
+                    *target++=c;
+                    if(offsets!=NULL) {
+                        *offsets++=sourceIndex;
+                    }
+                    cnv->UCharErrorBuffer[0]=trail;
+                    cnv->UCharErrorBufferLength=1;
+                    cnv->toULength=0;
+                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+                } else {
+                    /* unmatched lead surrogate */
+                    *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+                }
+            } else {
+                /* see if the trail surrogate is in the next buffer */
+            }
+        } else {
+            /* unmatched trail surrogate */
+            *pErrorCode=U_ILLEGAL_CHAR_FOUND;
+        }
+    }
+
+    if(U_SUCCESS(*pErrorCode)) {
+        /* check for a remaining source byte */
+        if(length>0) {
+            if(targetCapacity==0) {
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            } else {
+                /* it must be length==1 because otherwise the above would have copied more */
+                cnv->toUBytes[cnv->toULength++]=*source++;
+            }
+        }
+    }
+
+    /* write back the updated pointers */
+    pArgs->source=(const char *)source;
+    pArgs->target=target;
+    pArgs->offsets=offsets;
+}
+
+static UChar32  U_CALLCONV
+_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) {
+    const uint8_t *s, *sourceLimit;
+    UChar32 c;
+
+    if(pArgs->converter->mode<8) {
+        return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+    }
+
+    s=(const uint8_t *)pArgs->source;
+    sourceLimit=(const uint8_t *)pArgs->sourceLimit;
+
+    if(s>=sourceLimit) {
+        /* no input */
+        *err=U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0xffff;
+    }
+
+    if(s+2>sourceLimit) {
+        /* only one byte: truncated UChar */
+        pArgs->converter->toUBytes[0]=*s++;
+        pArgs->converter->toULength=1;
+        pArgs->source=(const char *)s;
+        *err = U_TRUNCATED_CHAR_FOUND;
+        return 0xffff;
+    }
+
+    /* get one UChar */
+    c=((UChar32)s[1]<<8)|*s;
+    s+=2;
+
+    /* check for a surrogate pair */
+    if(U_IS_SURROGATE(c)) {
+        if(U16_IS_SURROGATE_LEAD(c)) {
+            if(s+2<=sourceLimit) {
+                UChar trail;
+
+                /* get a second UChar and see if it is a trail surrogate */
+                trail=((UChar)s[1]<<8)|*s;
+                if(U16_IS_TRAIL(trail)) {
+                    c=U16_GET_SUPPLEMENTARY(c, trail);
+                    s+=2;
+                } else {
+                    /* unmatched lead surrogate */
+                    c=-2;
+                }
+            } else {
+                /* too few (2 or 3) bytes for a surrogate pair: truncated code point */
+                uint8_t *bytes=pArgs->converter->toUBytes;
+                s-=2;
+                pArgs->converter->toULength=(int8_t)(sourceLimit-s);
+                do {
+                    *bytes++=*s++;
+                } while(s<sourceLimit);
+
+                c=0xffff;
+                *err=U_TRUNCATED_CHAR_FOUND;
+            }
+        } else {
+            /* unmatched trail surrogate */
+            c=-2;
+        }
+
+        if(c<0) {
+            /* write the unmatched surrogate */
+            uint8_t *bytes=pArgs->converter->toUBytes;
+            pArgs->converter->toULength=2;
+            *bytes=*(s-2);
+            bytes[1]=*(s-1);
+
+            c=0xffff;
+            *err=U_ILLEGAL_CHAR_FOUND;
+        }
+    }
+
+    pArgs->source=(const char *)s;
+    return c;
 } 
- 
-static UChar32  U_CALLCONV 
-_UTF16BEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 
-    const uint8_t *s, *sourceLimit; 
-    UChar32 c; 
- 
-    if(pArgs->converter->mode<8) { 
-        return UCNV_GET_NEXT_UCHAR_USE_TO_U; 
-    } 
- 
-    s=(const uint8_t *)pArgs->source; 
-    sourceLimit=(const uint8_t *)pArgs->sourceLimit; 
- 
-    if(s>=sourceLimit) { 
-        /* no input */ 
-        *err=U_INDEX_OUTOFBOUNDS_ERROR; 
-        return 0xffff; 
-    } 
- 
-    if(s+2>sourceLimit) { 
-        /* only one byte: truncated UChar */ 
-        pArgs->converter->toUBytes[0]=*s++; 
-        pArgs->converter->toULength=1; 
-        pArgs->source=(const char *)s; 
-        *err = U_TRUNCATED_CHAR_FOUND; 
-        return 0xffff; 
-    } 
- 
-    /* get one UChar */ 
-    c=((UChar32)*s<<8)|s[1]; 
-    s+=2; 
- 
-    /* check for a surrogate pair */ 
-    if(U_IS_SURROGATE(c)) { 
-        if(U16_IS_SURROGATE_LEAD(c)) { 
-            if(s+2<=sourceLimit) { 
-                UChar trail; 
- 
-                /* get a second UChar and see if it is a trail surrogate */ 
-                trail=((UChar)*s<<8)|s[1]; 
-                if(U16_IS_TRAIL(trail)) { 
-                    c=U16_GET_SUPPLEMENTARY(c, trail); 
-                    s+=2; 
-                } else { 
-                    /* unmatched lead surrogate */ 
-                    c=-2; 
-                } 
-            } else { 
-                /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ 
-                uint8_t *bytes=pArgs->converter->toUBytes; 
-                s-=2; 
-                pArgs->converter->toULength=(int8_t)(sourceLimit-s); 
-                do { 
-                    *bytes++=*s++; 
-                } while(s<sourceLimit); 
- 
-                c=0xffff; 
-                *err=U_TRUNCATED_CHAR_FOUND; 
-            } 
-        } else { 
-            /* unmatched trail surrogate */ 
-            c=-2; 
-        } 
- 
-        if(c<0) { 
-            /* write the unmatched surrogate */ 
-            uint8_t *bytes=pArgs->converter->toUBytes; 
-            pArgs->converter->toULength=2; 
-            *bytes=*(s-2); 
-            bytes[1]=*(s-1); 
- 
-            c=0xffff; 
-            *err=U_ILLEGAL_CHAR_FOUND; 
-        } 
-    } 
- 
-    pArgs->source=(const char *)s; 
-    return c; 
-}  
- 
-static void  U_CALLCONV 
-_UTF16BEReset(UConverter *cnv, UConverterResetChoice choice) { 
-    if(choice<=UCNV_RESET_TO_UNICODE) { 
-        /* reset toUnicode state */ 
-        if(UCNV_GET_VERSION(cnv)==0) { 
-            cnv->mode=8; /* no BOM handling */ 
-        } else { 
-            cnv->mode=0; /* Java-specific "UnicodeBig" requires BE BOM or no BOM */ 
-        } 
-    } 
-    if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { 
-        /* reset fromUnicode for "UnicodeBig": prepare to output the UTF-16BE BOM */ 
-        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 
-    } 
-} 
- 
-static void  U_CALLCONV 
-_UTF16BEOpen(UConverter *cnv, 
-             UConverterLoadArgs *pArgs, 
-             UErrorCode *pErrorCode) { 
-    (void)pArgs; 
-    if(UCNV_GET_VERSION(cnv)<=1) { 
-        _UTF16BEReset(cnv, UCNV_RESET_BOTH); 
-    } else { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-    } 
-} 
- 
-static const char *  U_CALLCONV 
-_UTF16BEGetName(const UConverter *cnv) { 
-    if(UCNV_GET_VERSION(cnv)==0) { 
-        return "UTF-16BE"; 
-    } else { 
-        return "UTF-16BE,version=1"; 
-    } 
-} 
-U_CDECL_END 
- 
-static const UConverterImpl _UTF16BEImpl={ 
-    UCNV_UTF16_BigEndian, 
- 
-    NULL, 
-    NULL, 
- 
-    _UTF16BEOpen, 
-    NULL, 
-    _UTF16BEReset, 
- 
-    _UTF16BEToUnicodeWithOffsets, 
-    _UTF16BEToUnicodeWithOffsets, 
-    _UTF16BEFromUnicodeWithOffsets, 
-    _UTF16BEFromUnicodeWithOffsets, 
-    _UTF16BEGetNextUChar, 
- 
-    NULL, 
-    _UTF16BEGetName, 
-    NULL, 
-    NULL, 
-    ucnv_getNonSurrogateUnicodeSet, 
- 
-    NULL, 
-    NULL 
-}; 
- 
-static const UConverterStaticData _UTF16BEStaticData={ 
-    sizeof(UConverterStaticData), 
-    "UTF-16BE", 
-    1200, UCNV_IBM, UCNV_UTF16_BigEndian, 2, 2, 
-    { 0xff, 0xfd, 0, 0 },2,FALSE,FALSE, 
-    0, 
-    0, 
-    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
-}; 
- 
- 
-const UConverterSharedData _UTF16BEData= 
-        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16BEStaticData, &_UTF16BEImpl); 
- 
-/* UTF-16LE ----------------------------------------------------------------- */ 
-U_CDECL_BEGIN 
-static void  U_CALLCONV 
-_UTF16LEFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs, 
-                               UErrorCode *pErrorCode) { 
-    UConverter *cnv; 
-    const UChar *source; 
-    char *target; 
-    int32_t *offsets; 
- 
-    uint32_t targetCapacity, length, sourceIndex; 
-    UChar c, trail; 
-    char overflow[4]; 
- 
-    source=pArgs->source; 
-    length=(int32_t)(pArgs->sourceLimit-source); 
-    if(length<=0) { 
-        /* no input, nothing to do */ 
-        return; 
-    } 
- 
-    cnv=pArgs->converter; 
- 
-    /* write the BOM if necessary */ 
-    if(cnv->fromUnicodeStatus==UCNV_NEED_TO_WRITE_BOM) { 
-        static const char bom[]={ (char)0xffu, (char)0xfeu }; 
-        ucnv_fromUWriteBytes(cnv, 
-                             bom, 2, 
-                             &pArgs->target, pArgs->targetLimit, 
-                             &pArgs->offsets, -1, 
-                             pErrorCode); 
-        cnv->fromUnicodeStatus=0; 
-    } 
- 
-    target=pArgs->target; 
-    if(target >= pArgs->targetLimit) { 
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-        return; 
-    } 
- 
-    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); 
-    offsets=pArgs->offsets; 
-    sourceIndex=0; 
- 
-    /* c!=0 indicates in several places outside the main loops that a surrogate was found */ 
- 
-    if((c=(UChar)cnv->fromUChar32)!=0 && U16_IS_TRAIL(trail=*source) && targetCapacity>=4) { 
-        /* the last buffer ended with a lead surrogate, output the surrogate pair */ 
-        ++source; 
-        --length; 
-        target[0]=(uint8_t)c; 
-        target[1]=(uint8_t)(c>>8); 
-        target[2]=(uint8_t)trail; 
-        target[3]=(uint8_t)(trail>>8); 
-        target+=4; 
-        targetCapacity-=4; 
-        if(offsets!=NULL) { 
-            *offsets++=-1; 
-            *offsets++=-1; 
-            *offsets++=-1; 
-            *offsets++=-1; 
-        } 
-        sourceIndex=1; 
-        cnv->fromUChar32=c=0; 
-    } 
- 
-    if(c==0) { 
-        /* copy an even number of bytes for complete UChars */ 
-        uint32_t count=2*length; 
-        if(count>targetCapacity) { 
-            count=targetCapacity&~1; 
-        } 
-        /* count is even */ 
-        targetCapacity-=count; 
-        count>>=1; 
-        length-=count; 
- 
-        if(offsets==NULL) { 
-            while(count>0) { 
-                c=*source++; 
-                if(U16_IS_SINGLE(c)) { 
-                    target[0]=(uint8_t)c; 
-                    target[1]=(uint8_t)(c>>8); 
-                    target+=2; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 
-                    ++source; 
-                    --count; 
-                    target[0]=(uint8_t)c; 
-                    target[1]=(uint8_t)(c>>8); 
-                    target[2]=(uint8_t)trail; 
-                    target[3]=(uint8_t)(trail>>8); 
-                    target+=4; 
-                } else { 
-                    break; 
-                } 
-                --count; 
-            } 
-        } else { 
-            while(count>0) { 
-                c=*source++; 
-                if(U16_IS_SINGLE(c)) { 
-                    target[0]=(uint8_t)c; 
-                    target[1]=(uint8_t)(c>>8); 
-                    target+=2; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex++; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && U16_IS_TRAIL(trail=*source)) { 
-                    ++source; 
-                    --count; 
-                    target[0]=(uint8_t)c; 
-                    target[1]=(uint8_t)(c>>8); 
-                    target[2]=(uint8_t)trail; 
-                    target[3]=(uint8_t)(trail>>8); 
-                    target+=4; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    sourceIndex+=2; 
-                } else { 
-                    break; 
-                } 
-                --count; 
-            } 
-        } 
- 
-        if(count==0) { 
-            /* done with the loop for complete UChars */ 
-            if(length>0 && targetCapacity>0) { 
-                /* 
-                 * there is more input and some target capacity - 
-                 * it must be targetCapacity==1 because otherwise 
-                 * the above would have copied more; 
-                 * prepare for overflow output 
-                 */ 
-                if(U16_IS_SINGLE(c=*source++)) { 
-                    overflow[0]=(char)c; 
-                    overflow[1]=(char)(c>>8); 
-                    length=2; /* 2 bytes to output */ 
-                    c=0; 
-                /* } else { keep c for surrogate handling, length will be set there */ 
-                } 
-            } else { 
-                length=0; 
-                c=0; 
-            } 
-        } else { 
-            /* keep c for surrogate handling, length will be set there */ 
-            targetCapacity+=2*count; 
-        } 
-    } else { 
-        length=0; /* from here on, length counts the bytes in overflow[] */ 
-    } 
-     
-    if(c!=0) { 
-        /* 
-         * c is a surrogate, and 
-         * - source or target too short 
-         * - or the surrogate is unmatched 
-         */ 
-        length=0; 
-        if(U16_IS_SURROGATE_LEAD(c)) { 
-            if(source<pArgs->sourceLimit) { 
-                if(U16_IS_TRAIL(trail=*source)) { 
-                    /* output the surrogate pair, will overflow (see conditions comment above) */ 
-                    ++source; 
-                    overflow[0]=(char)c; 
-                    overflow[1]=(char)(c>>8); 
-                    overflow[2]=(char)trail; 
-                    overflow[3]=(char)(trail>>8); 
-                    length=4; /* 4 bytes to output */ 
-                    c=0; 
-                } else { 
-                    /* unmatched lead surrogate */ 
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-                } 
-            } else { 
-                /* see if the trail surrogate is in the next buffer */ 
-            } 
-        } else { 
-            /* unmatched trail surrogate */ 
-            *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-        } 
-        cnv->fromUChar32=c; 
-    } 
- 
-    if(length>0) { 
-        /* output length bytes with overflow (length>targetCapacity>0) */ 
-        ucnv_fromUWriteBytes(cnv, 
-                             overflow, length, 
-                             &target, pArgs->targetLimit, 
-                             &offsets, sourceIndex, 
-                             pErrorCode); 
-        targetCapacity=(uint32_t)(pArgs->targetLimit-(char *)target); 
-    } 
- 
-    if(U_SUCCESS(*pErrorCode) && source<pArgs->sourceLimit && targetCapacity==0) { 
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-    } 
- 
-    /* write back the updated pointers */ 
-    pArgs->source=source; 
-    pArgs->target=target; 
-    pArgs->offsets=offsets; 
-} 
- 
-static void  U_CALLCONV 
-_UTF16LEToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 
-                             UErrorCode *pErrorCode) { 
-    UConverter *cnv; 
-    const uint8_t *source; 
-    UChar *target; 
-    int32_t *offsets; 
- 
-    uint32_t targetCapacity, length, count, sourceIndex; 
-    UChar c, trail; 
- 
-    if(pArgs->converter->mode<8) { 
-        _UTF16ToUnicodeWithOffsets(pArgs, pErrorCode); 
-        return; 
-    } 
- 
-    cnv=pArgs->converter; 
-    source=(const uint8_t *)pArgs->source; 
-    length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source); 
-    if(length<=0 && cnv->toUnicodeStatus==0) { 
-        /* no input, nothing to do */ 
-        return; 
-    } 
- 
-    target=pArgs->target; 
-    if(target >= pArgs->targetLimit) { 
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-        return; 
-    } 
- 
-    targetCapacity=(uint32_t)(pArgs->targetLimit-pArgs->target); 
-    offsets=pArgs->offsets; 
-    sourceIndex=0; 
-    c=0; 
- 
-    /* complete a partial UChar or pair from the last call */ 
-    if(cnv->toUnicodeStatus!=0) { 
-        /* 
-         * special case: single byte from a previous buffer, 
-         * where the byte turned out not to belong to a trail surrogate 
-         * and the preceding, unmatched lead surrogate was put into toUBytes[] 
-         * for error handling 
-         */ 
-        cnv->toUBytes[0]=(uint8_t)cnv->toUnicodeStatus; 
-        cnv->toULength=1; 
-        cnv->toUnicodeStatus=0; 
-    } 
-    if((count=cnv->toULength)!=0) { 
-        uint8_t *p=cnv->toUBytes; 
-        do { 
-            p[count++]=*source++; 
-            ++sourceIndex; 
-            --length; 
-            if(count==2) { 
-                c=((UChar)p[1]<<8)|p[0]; 
-                if(U16_IS_SINGLE(c)) { 
-                    /* output the BMP code point */ 
-                    *target++=c; 
-                    if(offsets!=NULL) { 
-                        *offsets++=-1; 
-                    } 
-                    --targetCapacity; 
-                    count=0; 
-                    c=0; 
-                    break; 
-                } else if(U16_IS_SURROGATE_LEAD(c)) { 
-                    /* continue collecting bytes for the trail surrogate */ 
-                    c=0; /* avoid unnecessary surrogate handling below */ 
-                } else { 
-                    /* fall through to error handling for an unmatched trail surrogate */ 
-                    break; 
-                } 
-            } else if(count==4) { 
-                c=((UChar)p[1]<<8)|p[0]; 
-                trail=((UChar)p[3]<<8)|p[2]; 
-                if(U16_IS_TRAIL(trail)) { 
-                    /* output the surrogate pair */ 
-                    *target++=c; 
-                    if(targetCapacity>=2) { 
-                        *target++=trail; 
-                        if(offsets!=NULL) { 
-                            *offsets++=-1; 
-                            *offsets++=-1; 
-                        } 
-                        targetCapacity-=2; 
-                    } else /* targetCapacity==1 */ { 
-                        targetCapacity=0; 
-                        cnv->UCharErrorBuffer[0]=trail; 
-                        cnv->UCharErrorBufferLength=1; 
-                        *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-                    } 
-                    count=0; 
-                    c=0; 
-                    break; 
-                } else { 
-                    /* unmatched lead surrogate, handle here for consistent toUBytes[] */ 
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
- 
-                    /* back out reading the code unit after it */ 
-                    if(((const uint8_t *)pArgs->source-source)>=2) { 
-                        source-=2; 
-                    } else { 
-                        /* 
-                         * if the trail unit's first byte was in a previous buffer, then 
-                         * we need to put it into a special place because toUBytes[] will be 
-                         * used for the lead unit's bytes 
-                         */ 
-                        cnv->toUnicodeStatus=0x100|p[2]; 
-                        --source; 
-                    } 
-                    cnv->toULength=2; 
- 
-                    /* write back the updated pointers */ 
-                    pArgs->source=(const char *)source; 
-                    pArgs->target=target; 
-                    pArgs->offsets=offsets; 
-                    return; 
-                } 
-            } 
-        } while(length>0); 
-        cnv->toULength=(int8_t)count; 
-    } 
- 
-    /* copy an even number of bytes for complete UChars */ 
-    count=2*targetCapacity; 
-    if(count>length) { 
-        count=length&~1; 
-    } 
-    if(c==0 && count>0) { 
-        length-=count; 
-        count>>=1; 
-        targetCapacity-=count; 
-        if(offsets==NULL) { 
-            do { 
-                c=((UChar)source[1]<<8)|source[0]; 
-                source+=2; 
-                if(U16_IS_SINGLE(c)) { 
-                    *target++=c; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 
-                          U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) 
-                ) { 
-                    source+=2; 
-                    --count; 
-                    *target++=c; 
-                    *target++=trail; 
-                } else { 
-                    break; 
-                } 
-            } while(--count>0); 
-        } else { 
-            do { 
-                c=((UChar)source[1]<<8)|source[0]; 
-                source+=2; 
-                if(U16_IS_SINGLE(c)) { 
-                    *target++=c; 
-                    *offsets++=sourceIndex; 
-                    sourceIndex+=2; 
-                } else if(U16_IS_SURROGATE_LEAD(c) && count>=2 && 
-                          U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0]) 
-                ) { 
-                    source+=2; 
-                    --count; 
-                    *target++=c; 
-                    *target++=trail; 
-                    *offsets++=sourceIndex; 
-                    *offsets++=sourceIndex; 
-                    sourceIndex+=4; 
-                } else { 
-                    break; 
-                } 
-            } while(--count>0); 
-        } 
- 
-        if(count==0) { 
-            /* done with the loop for complete UChars */ 
-            c=0; 
-        } else { 
-            /* keep c for surrogate handling, trail will be set there */ 
-            length+=2*(count-1); /* one more byte pair was consumed than count decremented */ 
-            targetCapacity+=count; 
-        } 
-    } 
- 
-    if(c!=0) { 
-        /* 
-         * c is a surrogate, and 
-         * - source or target too short 
-         * - or the surrogate is unmatched 
-         */ 
-        cnv->toUBytes[0]=(uint8_t)c; 
-        cnv->toUBytes[1]=(uint8_t)(c>>8); 
-        cnv->toULength=2; 
- 
-        if(U16_IS_SURROGATE_LEAD(c)) { 
-            if(length>=2) { 
-                if(U16_IS_TRAIL(trail=((UChar)source[1]<<8)|source[0])) { 
-                    /* output the surrogate pair, will overflow (see conditions comment above) */ 
-                    source+=2; 
-                    length-=2; 
-                    *target++=c; 
-                    if(offsets!=NULL) { 
-                        *offsets++=sourceIndex; 
-                    } 
-                    cnv->UCharErrorBuffer[0]=trail; 
-                    cnv->UCharErrorBufferLength=1; 
-                    cnv->toULength=0; 
-                    *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-                } else { 
-                    /* unmatched lead surrogate */ 
-                    *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-                } 
-            } else { 
-                /* see if the trail surrogate is in the next buffer */ 
-            } 
-        } else { 
-            /* unmatched trail surrogate */ 
-            *pErrorCode=U_ILLEGAL_CHAR_FOUND; 
-        } 
-    } 
- 
-    if(U_SUCCESS(*pErrorCode)) { 
-        /* check for a remaining source byte */ 
-        if(length>0) { 
-            if(targetCapacity==0) { 
-                *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 
-            } else { 
-                /* it must be length==1 because otherwise the above would have copied more */ 
-                cnv->toUBytes[cnv->toULength++]=*source++; 
-            } 
-        } 
-    } 
- 
-    /* write back the updated pointers */ 
-    pArgs->source=(const char *)source; 
-    pArgs->target=target; 
-    pArgs->offsets=offsets; 
-} 
- 
-static UChar32  U_CALLCONV 
-_UTF16LEGetNextUChar(UConverterToUnicodeArgs *pArgs, UErrorCode *err) { 
-    const uint8_t *s, *sourceLimit; 
-    UChar32 c; 
- 
-    if(pArgs->converter->mode<8) { 
-        return UCNV_GET_NEXT_UCHAR_USE_TO_U; 
-    } 
- 
-    s=(const uint8_t *)pArgs->source; 
-    sourceLimit=(const uint8_t *)pArgs->sourceLimit; 
- 
-    if(s>=sourceLimit) { 
-        /* no input */ 
-        *err=U_INDEX_OUTOFBOUNDS_ERROR; 
-        return 0xffff; 
-    } 
- 
-    if(s+2>sourceLimit) { 
-        /* only one byte: truncated UChar */ 
-        pArgs->converter->toUBytes[0]=*s++; 
-        pArgs->converter->toULength=1; 
-        pArgs->source=(const char *)s; 
-        *err = U_TRUNCATED_CHAR_FOUND; 
-        return 0xffff; 
-    } 
- 
-    /* get one UChar */ 
-    c=((UChar32)s[1]<<8)|*s; 
-    s+=2; 
- 
-    /* check for a surrogate pair */ 
-    if(U_IS_SURROGATE(c)) { 
-        if(U16_IS_SURROGATE_LEAD(c)) { 
-            if(s+2<=sourceLimit) { 
-                UChar trail; 
- 
-                /* get a second UChar and see if it is a trail surrogate */ 
-                trail=((UChar)s[1]<<8)|*s; 
-                if(U16_IS_TRAIL(trail)) { 
-                    c=U16_GET_SUPPLEMENTARY(c, trail); 
-                    s+=2; 
-                } else { 
-                    /* unmatched lead surrogate */ 
-                    c=-2; 
-                } 
-            } else { 
-                /* too few (2 or 3) bytes for a surrogate pair: truncated code point */ 
-                uint8_t *bytes=pArgs->converter->toUBytes; 
-                s-=2; 
-                pArgs->converter->toULength=(int8_t)(sourceLimit-s); 
-                do { 
-                    *bytes++=*s++; 
-                } while(s<sourceLimit); 
- 
-                c=0xffff; 
-                *err=U_TRUNCATED_CHAR_FOUND; 
-            } 
-        } else { 
-            /* unmatched trail surrogate */ 
-            c=-2; 
-        } 
- 
-        if(c<0) { 
-            /* write the unmatched surrogate */ 
-            uint8_t *bytes=pArgs->converter->toUBytes; 
-            pArgs->converter->toULength=2; 
-            *bytes=*(s-2); 
-            bytes[1]=*(s-1); 
- 
-            c=0xffff; 
-            *err=U_ILLEGAL_CHAR_FOUND; 
-        } 
-    } 
- 
-    pArgs->source=(const char *)s; 
-    return c; 
-}  
- 
-static void  U_CALLCONV 
-_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) { 
-    if(choice<=UCNV_RESET_TO_UNICODE) { 
-        /* reset toUnicode state */ 
-        if(UCNV_GET_VERSION(cnv)==0) { 
-            cnv->mode=8; /* no BOM handling */ 
-        } else { 
-            cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */ 
-        } 
-    } 
-    if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) { 
-        /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */ 
-        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 
-    } 
-} 
- 
-static void  U_CALLCONV 
-_UTF16LEOpen(UConverter *cnv, 
-             UConverterLoadArgs *pArgs, 
-             UErrorCode *pErrorCode) { 
-    (void)pArgs; 
-    if(UCNV_GET_VERSION(cnv)<=1) { 
-        _UTF16LEReset(cnv, UCNV_RESET_BOTH); 
-    } else { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-    } 
-} 
- 
-static const char *  U_CALLCONV 
-_UTF16LEGetName(const UConverter *cnv) { 
-    if(UCNV_GET_VERSION(cnv)==0) { 
-        return "UTF-16LE"; 
-    } else { 
-        return "UTF-16LE,version=1"; 
-    } 
-} 
-U_CDECL_END 
- 
-static const UConverterImpl _UTF16LEImpl={ 
-    UCNV_UTF16_LittleEndian, 
- 
-    NULL, 
-    NULL, 
- 
-    _UTF16LEOpen, 
-    NULL, 
-    _UTF16LEReset, 
- 
-    _UTF16LEToUnicodeWithOffsets, 
-    _UTF16LEToUnicodeWithOffsets, 
-    _UTF16LEFromUnicodeWithOffsets, 
-    _UTF16LEFromUnicodeWithOffsets, 
-    _UTF16LEGetNextUChar, 
- 
-    NULL, 
-    _UTF16LEGetName, 
-    NULL, 
-    NULL, 
-    ucnv_getNonSurrogateUnicodeSet, 
- 
-    NULL, 
-    NULL 
-}; 
- 
- 
-static const UConverterStaticData _UTF16LEStaticData={ 
-    sizeof(UConverterStaticData), 
-    "UTF-16LE", 
-    1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2, 
-    { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE, 
-    0, 
-    0, 
-    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
-}; 
- 
- 
-const UConverterSharedData _UTF16LEData= 
-        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl); 
- 
-/* UTF-16 (Detect BOM) ------------------------------------------------------ */ 
- 
-/* 
- * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE 
- * accordingly. 
- * This is a simpler version of the UTF-32 converter, with 
- * fewer states for shorter BOMs. 
- * 
- * State values: 
- * 0    initial state 
- * 1    saw first byte 
- * 2..5 - 
- * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1 
- * 8    UTF-16BE mode 
- * 9    UTF-16LE mode 
- * 
- * During detection: state==number of initial bytes seen so far. 
- * 
- * On output, emit U+FEFF as the first code point. 
- * 
- * Variants: 
- * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error. 
- * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and 
- *   UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error. 
- */ 
-U_CDECL_BEGIN 
-static void  U_CALLCONV 
-_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) { 
-    if(choice<=UCNV_RESET_TO_UNICODE) { 
-        /* reset toUnicode: state=0 */ 
-        cnv->mode=0; 
-    } 
-    if(choice!=UCNV_RESET_TO_UNICODE) { 
-        /* reset fromUnicode: prepare to output the UTF-16PE BOM */ 
-        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM; 
-    } 
-} 
-U_CDECL_END 
-extern const UConverterSharedData _UTF16v2Data; 
-U_CDECL_BEGIN 
-static void U_CALLCONV 
-_UTF16Open(UConverter *cnv, 
-           UConverterLoadArgs *pArgs, 
-           UErrorCode *pErrorCode) { 
-    if(UCNV_GET_VERSION(cnv)<=2) { 
-        if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) { 
-            /* 
-             * Switch implementation, and switch the staticData that's different 
-             * and was copied into the UConverter. 
-             * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.) 
-             * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream. 
-             */ 
-            cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data; 
-            uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN); 
-        } 
-        _UTF16Reset(cnv, UCNV_RESET_BOTH); 
-    } else { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-    } 
-} 
- 
-static const char *  U_CALLCONV 
-_UTF16GetName(const UConverter *cnv) { 
-    if(UCNV_GET_VERSION(cnv)==0) { 
-        return "UTF-16"; 
-    } else if(UCNV_GET_VERSION(cnv)==1) { 
-        return "UTF-16,version=1"; 
-    } else { 
-        return "UTF-16,version=2"; 
-    } 
-} 
-U_CDECL_END 
-extern const UConverterSharedData _UTF16Data; 
- 
-static inline bool IS_UTF16BE(const UConverter *cnv) { 
-    return ((cnv)->sharedData == &_UTF16BEData); 
-} 
- 
-static inline bool IS_UTF16LE(const UConverter *cnv) { 
-    return ((cnv)->sharedData == &_UTF16LEData); 
-} 
- 
-static inline bool IS_UTF16(const UConverter *cnv) { 
-    return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data); 
-} 
- 
-U_CDECL_BEGIN 
-static void U_CALLCONV 
-_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs, 
-                           UErrorCode *pErrorCode) { 
-    UConverter *cnv=pArgs->converter; 
-    const char *source=pArgs->source; 
-    const char *sourceLimit=pArgs->sourceLimit; 
-    int32_t *offsets=pArgs->offsets; 
- 
-    int32_t state, offsetDelta; 
-    uint8_t b; 
- 
-    state=cnv->mode; 
- 
-    /* 
-     * If we detect a BOM in this buffer, then we must add the BOM size to the 
-     * offsets because the actual converter function will not see and count the BOM. 
-     * offsetDelta will have the number of the BOM bytes that are in the current buffer. 
-     */ 
-    offsetDelta=0; 
- 
-    while(source<sourceLimit && U_SUCCESS(*pErrorCode)) { 
-        switch(state) { 
-        case 0: 
-            cnv->toUBytes[0]=(uint8_t)*source++; 
-            cnv->toULength=1; 
-            state=1; 
-            break; 
-        case 1: 
-            /* 
-             * Only inside this switch case can the state variable 
-             * temporarily take two additional values: 
-             * 6: BOM error, continue with BE 
-             * 7: BOM error, continue with LE 
-             */ 
-            b=*source; 
-            if(cnv->toUBytes[0]==0xfe && b==0xff) { 
-                if(IS_UTF16LE(cnv)) { 
-                    state=7; /* illegal reverse BOM for Java "UnicodeLittle" */ 
-                } else { 
-                    state=8; /* detect UTF-16BE */ 
-                } 
-            } else if(cnv->toUBytes[0]==0xff && b==0xfe) { 
-                if(IS_UTF16BE(cnv)) { 
-                    state=6; /* illegal reverse BOM for Java "UnicodeBig" */ 
-                } else { 
-                    state=9; /* detect UTF-16LE */ 
-                } 
-            } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) { 
-                state=6; /* illegal missing BOM for Java "Unicode" */ 
-            } 
-            if(state>=8) { 
-                /* BOM detected, consume it */ 
-                ++source; 
-                cnv->toULength=0; 
-                offsetDelta=(int32_t)(source-pArgs->source); 
-            } else if(state<6) { 
-                /* ok: no BOM, and not a reverse BOM */ 
-                if(source!=pArgs->source) { 
-                    /* reset the source for a correct first offset */ 
-                    source=pArgs->source; 
-                    cnv->toULength=0; 
-                } 
-                if(IS_UTF16LE(cnv)) { 
-                    /* Make Java "UnicodeLittle" default to LE. */ 
-                    state=9; 
-                } else { 
-                    /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */ 
-                    state=8; 
-                } 
-            } else { 
-                /* 
-                 * error: missing BOM, or reverse BOM 
-                 * UTF-16,version=1: Java-specific "Unicode" requires a BOM. 
-                 * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM. 
-                 * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM. 
-                 */ 
-                /* report the non-BOM or reverse BOM as an illegal sequence */ 
-                cnv->toUBytes[1]=b; 
-                cnv->toULength=2; 
-                pArgs->source=source+1; 
-                /* continue with conversion if the callback resets the error */ 
-                /* 
-                 * Make Java "Unicode" default to BE like standard UTF-16. 
-                 * Make Java "UnicodeBig" and "UnicodeLittle" default 
-                 * to their normal endiannesses. 
-                 */ 
-                cnv->mode=state+2; 
-                *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE; 
-                return; 
-            } 
-            /* convert the rest of the stream */ 
-            cnv->mode=state; 
-            continue; 
-        case 8: 
-            /* call UTF-16BE */ 
-            pArgs->source=source; 
-            _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); 
-            source=pArgs->source; 
-            break; 
-        case 9: 
-            /* call UTF-16LE */ 
-            pArgs->source=source; 
-            _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); 
-            source=pArgs->source; 
-            break; 
-        default: 
-            break; /* does not occur */ 
-        } 
-    } 
- 
-    /* add BOM size to offsets - see comment at offsetDelta declaration */ 
-    if(offsets!=NULL && offsetDelta!=0) { 
-        int32_t *offsetsLimit=pArgs->offsets; 
-        while(offsets<offsetsLimit) { 
-            *offsets++ += offsetDelta; 
-        } 
-    } 
- 
-    pArgs->source=source; 
- 
-    if(source==sourceLimit && pArgs->flush) { 
-        /* handle truncated input */ 
-        switch(state) { 
-        case 0: 
-            break; /* no input at all, nothing to do */ 
-        case 8: 
-            _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode); 
-            break; 
-        case 9: 
-            _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode); 
-            break; 
-        default: 
-            /* 0<state<8: framework will report truncation, nothing to do here */ 
-            break; 
-        } 
-    } 
- 
-    cnv->mode=state; 
-} 
- 
-static UChar32 U_CALLCONV 
-_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs, 
-                   UErrorCode *pErrorCode) { 
-    switch(pArgs->converter->mode) { 
-    case 8: 
-        return _UTF16BEGetNextUChar(pArgs, pErrorCode); 
-    case 9: 
-        return _UTF16LEGetNextUChar(pArgs, pErrorCode); 
-    default: 
-        return UCNV_GET_NEXT_UCHAR_USE_TO_U; 
-    } 
-} 
-U_CDECL_END 
- 
-static const UConverterImpl _UTF16Impl = { 
-    UCNV_UTF16, 
- 
-    NULL, 
-    NULL, 
- 
-    _UTF16Open, 
-    NULL, 
-    _UTF16Reset, 
- 
-    _UTF16ToUnicodeWithOffsets, 
-    _UTF16ToUnicodeWithOffsets, 
-    _UTF16PEFromUnicodeWithOffsets, 
-    _UTF16PEFromUnicodeWithOffsets, 
-    _UTF16GetNextUChar, 
- 
-    NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 
-    _UTF16GetName, 
-    NULL, 
-    NULL, 
-    ucnv_getNonSurrogateUnicodeSet, 
- 
-    NULL, 
-    NULL 
-}; 
- 
-static const UConverterStaticData _UTF16StaticData = { 
-    sizeof(UConverterStaticData), 
-    "UTF-16", 
-    1204, /* CCSID for BOM sensitive UTF-16 */ 
-    UCNV_IBM, UCNV_UTF16, 2, 2, 
-#if U_IS_BIG_ENDIAN 
-    { 0xff, 0xfd, 0, 0 }, 2, 
-#else 
-    { 0xfd, 0xff, 0, 0 }, 2, 
-#endif 
-    FALSE, FALSE, 
-    0, 
-    0, 
-    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
-}; 
- 
-const UConverterSharedData _UTF16Data = 
-        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl); 
- 
-static const UConverterImpl _UTF16v2Impl = { 
-    UCNV_UTF16, 
- 
-    NULL, 
-    NULL, 
- 
-    _UTF16Open, 
-    NULL, 
-    _UTF16Reset, 
- 
-    _UTF16ToUnicodeWithOffsets, 
-    _UTF16ToUnicodeWithOffsets, 
-    _UTF16BEFromUnicodeWithOffsets, 
-    _UTF16BEFromUnicodeWithOffsets, 
-    _UTF16GetNextUChar, 
- 
-    NULL, /* ### TODO implement getStarters for all Unicode encodings?! */ 
-    _UTF16GetName, 
-    NULL, 
-    NULL, 
-    ucnv_getNonSurrogateUnicodeSet, 
- 
-    NULL, 
-    NULL 
-}; 
- 
-static const UConverterStaticData _UTF16v2StaticData = { 
-    sizeof(UConverterStaticData), 
-    "UTF-16,version=2", 
-    1204, /* CCSID for BOM sensitive UTF-16 */ 
-    UCNV_IBM, UCNV_UTF16, 2, 2, 
-    { 0xff, 0xfd, 0, 0 }, 2, 
-    FALSE, FALSE, 
-    0, 
-    0, 
-    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */ 
-}; 
- 
-const UConverterSharedData _UTF16v2Data = 
-        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl); 
- 
-#endif 
+
+static void  U_CALLCONV
+_UTF16LEReset(UConverter *cnv, UConverterResetChoice choice) {
+    if(choice<=UCNV_RESET_TO_UNICODE) {
+        /* reset toUnicode state */
+        if(UCNV_GET_VERSION(cnv)==0) {
+            cnv->mode=8; /* no BOM handling */
+        } else {
+            cnv->mode=0; /* Java-specific "UnicodeLittle" requires LE BOM or no BOM */
+        }
+    }
+    if(choice!=UCNV_RESET_TO_UNICODE && UCNV_GET_VERSION(cnv)==1) {
+        /* reset fromUnicode for "UnicodeLittle": prepare to output the UTF-16LE BOM */
+        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+    }
+}
+
+static void  U_CALLCONV
+_UTF16LEOpen(UConverter *cnv,
+             UConverterLoadArgs *pArgs,
+             UErrorCode *pErrorCode) {
+    (void)pArgs;
+    if(UCNV_GET_VERSION(cnv)<=1) {
+        _UTF16LEReset(cnv, UCNV_RESET_BOTH);
+    } else {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+static const char *  U_CALLCONV
+_UTF16LEGetName(const UConverter *cnv) {
+    if(UCNV_GET_VERSION(cnv)==0) {
+        return "UTF-16LE";
+    } else {
+        return "UTF-16LE,version=1";
+    }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF16LEImpl={
+    UCNV_UTF16_LittleEndian,
+
+    NULL,
+    NULL,
+
+    _UTF16LEOpen,
+    NULL,
+    _UTF16LEReset,
+
+    _UTF16LEToUnicodeWithOffsets,
+    _UTF16LEToUnicodeWithOffsets,
+    _UTF16LEFromUnicodeWithOffsets,
+    _UTF16LEFromUnicodeWithOffsets,
+    _UTF16LEGetNextUChar,
+
+    NULL,
+    _UTF16LEGetName,
+    NULL,
+    NULL,
+    ucnv_getNonSurrogateUnicodeSet,
+
+    NULL,
+    NULL
+};
+
+
+static const UConverterStaticData _UTF16LEStaticData={
+    sizeof(UConverterStaticData),
+    "UTF-16LE",
+    1202, UCNV_IBM, UCNV_UTF16_LittleEndian, 2, 2,
+    { 0xfd, 0xff, 0, 0 },2,FALSE,FALSE,
+    0,
+    0,
+    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+
+const UConverterSharedData _UTF16LEData=
+        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16LEStaticData, &_UTF16LEImpl);
+
+/* UTF-16 (Detect BOM) ------------------------------------------------------ */
+
+/*
+ * Detect a BOM at the beginning of the stream and select UTF-16BE or UTF-16LE
+ * accordingly.
+ * This is a simpler version of the UTF-32 converter, with
+ * fewer states for shorter BOMs.
+ *
+ * State values:
+ * 0    initial state
+ * 1    saw first byte
+ * 2..5 -
+ * 6..7 see _UTF16ToUnicodeWithOffsets() comments in state 1
+ * 8    UTF-16BE mode
+ * 9    UTF-16LE mode
+ *
+ * During detection: state==number of initial bytes seen so far.
+ *
+ * On output, emit U+FEFF as the first code point.
+ *
+ * Variants:
+ * - UTF-16,version=1 (Java "Unicode" encoding) treats a missing BOM as an error.
+ * - UTF-16BE,version=1 (Java "UnicodeBig" encoding) and
+ *   UTF-16LE,version=1 (Java "UnicodeLittle" encoding) treat a reverse BOM as an error.
+ */
+U_CDECL_BEGIN
+static void  U_CALLCONV
+_UTF16Reset(UConverter *cnv, UConverterResetChoice choice) {
+    if(choice<=UCNV_RESET_TO_UNICODE) {
+        /* reset toUnicode: state=0 */
+        cnv->mode=0;
+    }
+    if(choice!=UCNV_RESET_TO_UNICODE) {
+        /* reset fromUnicode: prepare to output the UTF-16PE BOM */
+        cnv->fromUnicodeStatus=UCNV_NEED_TO_WRITE_BOM;
+    }
+}
+U_CDECL_END
+extern const UConverterSharedData _UTF16v2Data;
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF16Open(UConverter *cnv,
+           UConverterLoadArgs *pArgs,
+           UErrorCode *pErrorCode) {
+    if(UCNV_GET_VERSION(cnv)<=2) {
+        if(UCNV_GET_VERSION(cnv)==2 && !pArgs->onlyTestIsLoadable) {
+            /*
+             * Switch implementation, and switch the staticData that's different
+             * and was copied into the UConverter.
+             * (See ucnv_createConverterFromSharedData() in ucnv_bld.c.)
+             * UTF-16,version=2 fromUnicode() always writes a big-endian byte stream.
+             */
+            cnv->sharedData=(UConverterSharedData*)&_UTF16v2Data;
+            uprv_memcpy(cnv->subChars, _UTF16v2Data.staticData->subChar, UCNV_MAX_SUBCHAR_LEN);
+        }
+        _UTF16Reset(cnv, UCNV_RESET_BOTH);
+    } else {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    }
+}
+
+static const char *  U_CALLCONV
+_UTF16GetName(const UConverter *cnv) {
+    if(UCNV_GET_VERSION(cnv)==0) {
+        return "UTF-16";
+    } else if(UCNV_GET_VERSION(cnv)==1) {
+        return "UTF-16,version=1";
+    } else {
+        return "UTF-16,version=2";
+    }
+}
+U_CDECL_END
+extern const UConverterSharedData _UTF16Data;
+
+static inline bool IS_UTF16BE(const UConverter *cnv) {
+    return ((cnv)->sharedData == &_UTF16BEData);
+}
+
+static inline bool IS_UTF16LE(const UConverter *cnv) {
+    return ((cnv)->sharedData == &_UTF16LEData);
+}
+
+static inline bool IS_UTF16(const UConverter *cnv) {
+    return ((cnv)->sharedData==&_UTF16Data) || ((cnv)->sharedData == &_UTF16v2Data);
+}
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+_UTF16ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
+                           UErrorCode *pErrorCode) {
+    UConverter *cnv=pArgs->converter;
+    const char *source=pArgs->source;
+    const char *sourceLimit=pArgs->sourceLimit;
+    int32_t *offsets=pArgs->offsets;
+
+    int32_t state, offsetDelta;
+    uint8_t b;
+
+    state=cnv->mode;
+
+    /*
+     * If we detect a BOM in this buffer, then we must add the BOM size to the
+     * offsets because the actual converter function will not see and count the BOM.
+     * offsetDelta will have the number of the BOM bytes that are in the current buffer.
+     */
+    offsetDelta=0;
+
+    while(source<sourceLimit && U_SUCCESS(*pErrorCode)) {
+        switch(state) {
+        case 0:
+            cnv->toUBytes[0]=(uint8_t)*source++;
+            cnv->toULength=1;
+            state=1;
+            break;
+        case 1:
+            /*
+             * Only inside this switch case can the state variable
+             * temporarily take two additional values:
+             * 6: BOM error, continue with BE
+             * 7: BOM error, continue with LE
+             */
+            b=*source;
+            if(cnv->toUBytes[0]==0xfe && b==0xff) {
+                if(IS_UTF16LE(cnv)) {
+                    state=7; /* illegal reverse BOM for Java "UnicodeLittle" */
+                } else {
+                    state=8; /* detect UTF-16BE */
+                }
+            } else if(cnv->toUBytes[0]==0xff && b==0xfe) {
+                if(IS_UTF16BE(cnv)) {
+                    state=6; /* illegal reverse BOM for Java "UnicodeBig" */
+                } else {
+                    state=9; /* detect UTF-16LE */
+                }
+            } else if((IS_UTF16(cnv) && UCNV_GET_VERSION(cnv)==1)) {
+                state=6; /* illegal missing BOM for Java "Unicode" */
+            }
+            if(state>=8) {
+                /* BOM detected, consume it */
+                ++source;
+                cnv->toULength=0;
+                offsetDelta=(int32_t)(source-pArgs->source);
+            } else if(state<6) {
+                /* ok: no BOM, and not a reverse BOM */
+                if(source!=pArgs->source) {
+                    /* reset the source for a correct first offset */
+                    source=pArgs->source;
+                    cnv->toULength=0;
+                }
+                if(IS_UTF16LE(cnv)) {
+                    /* Make Java "UnicodeLittle" default to LE. */
+                    state=9;
+                } else {
+                    /* Make standard UTF-16 and Java "UnicodeBig" default to BE. */
+                    state=8;
+                }
+            } else {
+                /*
+                 * error: missing BOM, or reverse BOM
+                 * UTF-16,version=1: Java-specific "Unicode" requires a BOM.
+                 * UTF-16BE,version=1: Java-specific "UnicodeBig" requires a BE BOM or no BOM.
+                 * UTF-16LE,version=1: Java-specific "UnicodeLittle" requires an LE BOM or no BOM.
+                 */
+                /* report the non-BOM or reverse BOM as an illegal sequence */
+                cnv->toUBytes[1]=b;
+                cnv->toULength=2;
+                pArgs->source=source+1;
+                /* continue with conversion if the callback resets the error */
+                /*
+                 * Make Java "Unicode" default to BE like standard UTF-16.
+                 * Make Java "UnicodeBig" and "UnicodeLittle" default
+                 * to their normal endiannesses.
+                 */
+                cnv->mode=state+2;
+                *pErrorCode=U_ILLEGAL_ESCAPE_SEQUENCE;
+                return;
+            }
+            /* convert the rest of the stream */
+            cnv->mode=state;
+            continue;
+        case 8:
+            /* call UTF-16BE */
+            pArgs->source=source;
+            _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
+            source=pArgs->source;
+            break;
+        case 9:
+            /* call UTF-16LE */
+            pArgs->source=source;
+            _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
+            source=pArgs->source;
+            break;
+        default:
+            break; /* does not occur */
+        }
+    }
+
+    /* add BOM size to offsets - see comment at offsetDelta declaration */
+    if(offsets!=NULL && offsetDelta!=0) {
+        int32_t *offsetsLimit=pArgs->offsets;
+        while(offsets<offsetsLimit) {
+            *offsets++ += offsetDelta;
+        }
+    }
+
+    pArgs->source=source;
+
+    if(source==sourceLimit && pArgs->flush) {
+        /* handle truncated input */
+        switch(state) {
+        case 0:
+            break; /* no input at all, nothing to do */
+        case 8:
+            _UTF16BEToUnicodeWithOffsets(pArgs, pErrorCode);
+            break;
+        case 9:
+            _UTF16LEToUnicodeWithOffsets(pArgs, pErrorCode);
+            break;
+        default:
+            /* 0<state<8: framework will report truncation, nothing to do here */
+            break;
+        }
+    }
+
+    cnv->mode=state;
+}
+
+static UChar32 U_CALLCONV
+_UTF16GetNextUChar(UConverterToUnicodeArgs *pArgs,
+                   UErrorCode *pErrorCode) {
+    switch(pArgs->converter->mode) {
+    case 8:
+        return _UTF16BEGetNextUChar(pArgs, pErrorCode);
+    case 9:
+        return _UTF16LEGetNextUChar(pArgs, pErrorCode);
+    default:
+        return UCNV_GET_NEXT_UCHAR_USE_TO_U;
+    }
+}
+U_CDECL_END
+
+static const UConverterImpl _UTF16Impl = {
+    UCNV_UTF16,
+
+    NULL,
+    NULL,
+
+    _UTF16Open,
+    NULL,
+    _UTF16Reset,
+
+    _UTF16ToUnicodeWithOffsets,
+    _UTF16ToUnicodeWithOffsets,
+    _UTF16PEFromUnicodeWithOffsets,
+    _UTF16PEFromUnicodeWithOffsets,
+    _UTF16GetNextUChar,
+
+    NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
+    _UTF16GetName,
+    NULL,
+    NULL,
+    ucnv_getNonSurrogateUnicodeSet,
+
+    NULL,
+    NULL
+};
+
+static const UConverterStaticData _UTF16StaticData = {
+    sizeof(UConverterStaticData),
+    "UTF-16",
+    1204, /* CCSID for BOM sensitive UTF-16 */
+    UCNV_IBM, UCNV_UTF16, 2, 2,
+#if U_IS_BIG_ENDIAN
+    { 0xff, 0xfd, 0, 0 }, 2,
+#else
+    { 0xfd, 0xff, 0, 0 }, 2,
+#endif
+    FALSE, FALSE,
+    0,
+    0,
+    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF16Data =
+        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16StaticData, &_UTF16Impl);
+
+static const UConverterImpl _UTF16v2Impl = {
+    UCNV_UTF16,
+
+    NULL,
+    NULL,
+
+    _UTF16Open,
+    NULL,
+    _UTF16Reset,
+
+    _UTF16ToUnicodeWithOffsets,
+    _UTF16ToUnicodeWithOffsets,
+    _UTF16BEFromUnicodeWithOffsets,
+    _UTF16BEFromUnicodeWithOffsets,
+    _UTF16GetNextUChar,
+
+    NULL, /* ### TODO implement getStarters for all Unicode encodings?! */
+    _UTF16GetName,
+    NULL,
+    NULL,
+    ucnv_getNonSurrogateUnicodeSet,
+
+    NULL,
+    NULL
+};
+
+static const UConverterStaticData _UTF16v2StaticData = {
+    sizeof(UConverterStaticData),
+    "UTF-16,version=2",
+    1204, /* CCSID for BOM sensitive UTF-16 */
+    UCNV_IBM, UCNV_UTF16, 2, 2,
+    { 0xff, 0xfd, 0, 0 }, 2,
+    FALSE, FALSE,
+    0,
+    0,
+    { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
+};
+
+const UConverterSharedData _UTF16v2Data =
+        UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_UTF16v2StaticData, &_UTF16v2Impl);
+
+#endif
author	mcheshkov <mcheshkov@yandex-team.ru>	2022-02-10 16:46:16 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:46:16 +0300
commit	1312621288956f199a5bd5342b0133d4395fa725 (patch)
tree	1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/icu/common/ucnv_u16.cpp
parent	e9d19cec64684c9c1e6b0c98297e5b895cf904fe (diff)
download	ydb-1312621288956f199a5bd5342b0133d4395fa725.tar.gz