Restoring authorship annotation for <neksard@yandex-team.ru>. Commit 2 of 2.

author: neksard <neksard@yandex-team.ru> 2022-02-10 16:45:33 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:33 +0300
commit: 1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
tree: b2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/common/unames.cpp
parent: 8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
download: ydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz
1 files changed, 2068 insertions, 2068 deletions
diff --git a/contrib/libs/icu/common/unames.cpp b/contrib/libs/icu/common/unames.cpp
index cf44d43613..5776058f95 100644
--- a/contrib/libs/icu/common/unames.cpp
+++ b/contrib/libs/icu/common/unames.cpp
@@ -1,1578 +1,1578 @@
 // © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html 
-/* 
-****************************************************************************** 
-* 
-*   Copyright (C) 1999-2014, International Business Machines 
-*   Corporation and others.  All Rights Reserved. 
-* 
-****************************************************************************** 
-*   file name:  unames.c 
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*   file name:  unames.c
 *   encoding:   UTF-8
-*   tab size:   8 (not used) 
-*   indentation:4 
-* 
-*   created on: 1999oct04 
-*   created by: Markus W. Scherer 
-*/ 
- 
-#include "unicode/utypes.h" 
-#include "unicode/putil.h" 
-#include "unicode/uchar.h" 
-#include "unicode/udata.h" 
-#include "unicode/utf.h" 
-#include "unicode/utf16.h" 
-#include "uassert.h" 
-#include "ustr_imp.h" 
-#include "umutex.h" 
-#include "cmemory.h" 
-#include "cstring.h" 
-#include "ucln_cmn.h" 
-#include "udataswp.h" 
-#include "uprops.h" 
- 
-U_NAMESPACE_BEGIN 
- 
-/* prototypes ------------------------------------------------------------- */ 
- 
-static const char DATA_NAME[] = "unames"; 
-static const char DATA_TYPE[] = "icu"; 
- 
-#define GROUP_SHIFT 5 
-#define LINES_PER_GROUP (1L<<GROUP_SHIFT) 
-#define GROUP_MASK (LINES_PER_GROUP-1) 
- 
-/* 
- * This struct was replaced by explicitly accessing equivalent 
- * fields from triples of uint16_t. 
- * The Group struct was padded to 8 bytes on compilers for early ARM CPUs, 
- * which broke the assumption that sizeof(Group)==6 and that the ++ operator 
- * would advance by 6 bytes (3 uint16_t). 
- * 
- * We can't just change the data structure because it's loaded from a data file, 
- * and we don't want to make it less compact, so we changed the access code. 
- * 
- * For details see ICU tickets 6331 and 6008. 
-typedef struct { 
-    uint16_t groupMSB, 
-             offsetHigh, offsetLow; / * avoid padding * / 
-} Group; 
- */ 
-enum { 
-    GROUP_MSB, 
-    GROUP_OFFSET_HIGH, 
-    GROUP_OFFSET_LOW, 
-    GROUP_LENGTH 
-}; 
- 
-/* 
- * Get the 32-bit group offset. 
- * @param group (const uint16_t *) pointer to a Group triple of uint16_t 
- * @return group offset (int32_t) 
- */ 
-#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW]) 
- 
-#define NEXT_GROUP(group) ((group)+GROUP_LENGTH) 
-#define PREV_GROUP(group) ((group)-GROUP_LENGTH) 
- 
-typedef struct { 
-    uint32_t start, end; 
-    uint8_t type, variant; 
-    uint16_t size; 
-} AlgorithmicRange; 
- 
-typedef struct { 
-    uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset; 
-} UCharNames; 
- 
-/* 
- * Get the groups table from a UCharNames struct. 
- * The groups table consists of one uint16_t groupCount followed by 
- * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH 
- * and the comment for the old struct Group above. 
- * 
- * @param names (const UCharNames *) pointer to the UCharNames indexes 
- * @return (const uint16_t *) pointer to the groups table 
- */ 
-#define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset) 
- 
-typedef struct { 
-    const char *otherName; 
-    UChar32 code; 
-} FindName; 
- 
-#define DO_FIND_NAME NULL 
- 
-static UDataMemory *uCharNamesData=NULL; 
-static UCharNames *uCharNames=NULL; 
-static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER; 
- 
-/* 
- * Maximum length of character names (regular & 1.0). 
- */ 
-static int32_t gMaxNameLength=0; 
- 
-/* 
- * Set of chars used in character names (regular & 1.0). 
- * Chars are platform-dependent (can be EBCDIC). 
- */ 
-static uint32_t gNameSet[8]={ 0 }; 
- 
-#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT 
-#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1 
-#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2 
- 
-#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3) 
- 
-static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = { 
-    "unassigned", 
-    "uppercase letter", 
-    "lowercase letter", 
-    "titlecase letter", 
-    "modifier letter", 
-    "other letter", 
-    "non spacing mark", 
-    "enclosing mark", 
-    "combining spacing mark", 
-    "decimal digit number", 
-    "letter number", 
-    "other number", 
-    "space separator", 
-    "line separator", 
-    "paragraph separator", 
-    "control", 
-    "format", 
-    "private use area", 
-    "surrogate", 
-    "dash punctuation",    
-    "start punctuation", 
-    "end punctuation", 
-    "connector punctuation", 
-    "other punctuation", 
-    "math symbol", 
-    "currency symbol", 
-    "modifier symbol", 
-    "other symbol", 
-    "initial punctuation", 
-    "final punctuation", 
-    "noncharacter", 
-    "lead surrogate", 
-    "trail surrogate" 
-}; 
- 
-/* implementation ----------------------------------------------------------- */ 
- 
-static UBool U_CALLCONV unames_cleanup(void) 
-{ 
-    if(uCharNamesData) { 
-        udata_close(uCharNamesData); 
-        uCharNamesData = NULL; 
-    } 
-    if(uCharNames) { 
-        uCharNames = NULL; 
-    } 
-    gCharNamesInitOnce.reset(); 
-    gMaxNameLength=0; 
-    return TRUE; 
-} 
- 
-static UBool U_CALLCONV 
-isAcceptable(void * /*context*/, 
-             const char * /*type*/, const char * /*name*/, 
-             const UDataInfo *pInfo) { 
-    return (UBool)( 
-        pInfo->size>=20 && 
-        pInfo->isBigEndian==U_IS_BIG_ENDIAN && 
-        pInfo->charsetFamily==U_CHARSET_FAMILY && 
-        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */ 
-        pInfo->dataFormat[1]==0x6e && 
-        pInfo->dataFormat[2]==0x61 && 
-        pInfo->dataFormat[3]==0x6d && 
-        pInfo->formatVersion[0]==1); 
-} 
- 
-static void U_CALLCONV 
-loadCharNames(UErrorCode &status) { 
-    U_ASSERT(uCharNamesData == NULL); 
-    U_ASSERT(uCharNames == NULL); 
- 
-    uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status); 
-    if(U_FAILURE(status)) { 
-        uCharNamesData = NULL; 
-    } else { 
-        uCharNames = (UCharNames *)udata_getMemory(uCharNamesData); 
-    } 
-    ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup); 
-} 
- 
- 
-static UBool 
-isDataLoaded(UErrorCode *pErrorCode) { 
-    umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode); 
-    return U_SUCCESS(*pErrorCode); 
-} 
- 
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999oct04
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/udata.h"
+#include "unicode/utf.h"
+#include "unicode/utf16.h"
+#include "uassert.h"
+#include "ustr_imp.h"
+#include "umutex.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "ucln_cmn.h"
+#include "udataswp.h"
+#include "uprops.h"
+
+U_NAMESPACE_BEGIN
+
+/* prototypes ------------------------------------------------------------- */
+
+static const char DATA_NAME[] = "unames";
+static const char DATA_TYPE[] = "icu";
+
+#define GROUP_SHIFT 5
+#define LINES_PER_GROUP (1L<<GROUP_SHIFT)
+#define GROUP_MASK (LINES_PER_GROUP-1)
+
+/*
+ * This struct was replaced by explicitly accessing equivalent
+ * fields from triples of uint16_t.
+ * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
+ * which broke the assumption that sizeof(Group)==6 and that the ++ operator
+ * would advance by 6 bytes (3 uint16_t).
+ *
+ * We can't just change the data structure because it's loaded from a data file,
+ * and we don't want to make it less compact, so we changed the access code.
+ *
+ * For details see ICU tickets 6331 and 6008.
+typedef struct {
+    uint16_t groupMSB,
+             offsetHigh, offsetLow; / * avoid padding * /
+} Group;
+ */
+enum {
+    GROUP_MSB,
+    GROUP_OFFSET_HIGH,
+    GROUP_OFFSET_LOW,
+    GROUP_LENGTH
+};
+
+/*
+ * Get the 32-bit group offset.
+ * @param group (const uint16_t *) pointer to a Group triple of uint16_t
+ * @return group offset (int32_t)
+ */
+#define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
+
+#define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
+#define PREV_GROUP(group) ((group)-GROUP_LENGTH)
+
+typedef struct {
+    uint32_t start, end;
+    uint8_t type, variant;
+    uint16_t size;
+} AlgorithmicRange;
+
+typedef struct {
+    uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
+} UCharNames;
+
+/*
+ * Get the groups table from a UCharNames struct.
+ * The groups table consists of one uint16_t groupCount followed by
+ * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
+ * and the comment for the old struct Group above.
+ *
+ * @param names (const UCharNames *) pointer to the UCharNames indexes
+ * @return (const uint16_t *) pointer to the groups table
+ */
+#define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
+
+typedef struct {
+    const char *otherName;
+    UChar32 code;
+} FindName;
+
+#define DO_FIND_NAME NULL
+
+static UDataMemory *uCharNamesData=NULL;
+static UCharNames *uCharNames=NULL;
+static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
+
+/*
+ * Maximum length of character names (regular & 1.0).
+ */
+static int32_t gMaxNameLength=0;
+
+/*
+ * Set of chars used in character names (regular & 1.0).
+ * Chars are platform-dependent (can be EBCDIC).
+ */
+static uint32_t gNameSet[8]={ 0 };
+
+#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
+#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
+#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
+
+#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
+
+static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
+    "unassigned",
+    "uppercase letter",
+    "lowercase letter",
+    "titlecase letter",
+    "modifier letter",
+    "other letter",
+    "non spacing mark",
+    "enclosing mark",
+    "combining spacing mark",
+    "decimal digit number",
+    "letter number",
+    "other number",
+    "space separator",
+    "line separator",
+    "paragraph separator",
+    "control",
+    "format",
+    "private use area",
+    "surrogate",
+    "dash punctuation",   
+    "start punctuation",
+    "end punctuation",
+    "connector punctuation",
+    "other punctuation",
+    "math symbol",
+    "currency symbol",
+    "modifier symbol",
+    "other symbol",
+    "initial punctuation",
+    "final punctuation",
+    "noncharacter",
+    "lead surrogate",
+    "trail surrogate"
+};
+
+/* implementation ----------------------------------------------------------- */
+
+static UBool U_CALLCONV unames_cleanup(void)
+{
+    if(uCharNamesData) {
+        udata_close(uCharNamesData);
+        uCharNamesData = NULL;
+    }
+    if(uCharNames) {
+        uCharNames = NULL;
+    }
+    gCharNamesInitOnce.reset();
+    gMaxNameLength=0;
+    return TRUE;
+}
+
+static UBool U_CALLCONV
+isAcceptable(void * /*context*/,
+             const char * /*type*/, const char * /*name*/,
+             const UDataInfo *pInfo) {
+    return (UBool)(
+        pInfo->size>=20 &&
+        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+        pInfo->charsetFamily==U_CHARSET_FAMILY &&
+        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
+        pInfo->dataFormat[1]==0x6e &&
+        pInfo->dataFormat[2]==0x61 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==1);
+}
+
+static void U_CALLCONV
+loadCharNames(UErrorCode &status) {
+    U_ASSERT(uCharNamesData == NULL);
+    U_ASSERT(uCharNames == NULL);
+
+    uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
+    if(U_FAILURE(status)) {
+        uCharNamesData = NULL;
+    } else {
+        uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
+    }
+    ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
+}
+
+
+static UBool
+isDataLoaded(UErrorCode *pErrorCode) {
+    umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
+    return U_SUCCESS(*pErrorCode);
+}
+
 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \
-    if((bufferLength)>0) { \ 
-        *(buffer)++=c; \ 
-        --(bufferLength); \ 
-    } \ 
-    ++(bufferPos); \ 
+    if((bufferLength)>0) { \
+        *(buffer)++=c; \
+        --(bufferLength); \
+    } \
+    ++(bufferPos); \
 } UPRV_BLOCK_MACRO_END
- 
-#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT 
- 
-/* 
- * Important: expandName() and compareName() are almost the same - 
- * apply fixes to both. 
- * 
- * UnicodeData.txt uses ';' as a field separator, so no 
- * field can contain ';' as part of its contents. 
- * In unames.dat, it is marked as token[';']==-1 only if the 
- * semicolon is used in the data file - which is iff we 
- * have Unicode 1.0 names or ISO comments or aliases. 
- * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases 
- * although we know that it will never be part of a name. 
- */ 
-static uint16_t 
-expandName(UCharNames *names, 
-           const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, 
-           char *buffer, uint16_t bufferLength) { 
-    uint16_t *tokens=(uint16_t *)names+8; 
-    uint16_t token, tokenCount=*tokens++, bufferPos=0; 
-    uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; 
-    uint8_t c; 
- 
-    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 
-        /* 
-         * skip the modern name if it is not requested _and_ 
-         * if the semicolon byte value is a character, not a token number 
-         */ 
-        if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 
-            int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; 
-            do { 
-                while(nameLength>0) { 
-                    --nameLength; 
-                    if(*name++==';') { 
-                        break; 
-                    } 
-                } 
-            } while(--fieldIndex>0); 
-        } else { 
-            /* 
-             * the semicolon byte value is a token number, therefore 
-             * only modern names are stored in unames.dat and there is no 
-             * such requested alternate name here 
-             */ 
-            nameLength=0; 
-        } 
-    } 
- 
-    /* write each letter directly, and write a token word per token */ 
-    while(nameLength>0) { 
-        --nameLength; 
-        c=*name++; 
- 
-        if(c>=tokenCount) { 
-            if(c!=';') { 
-                /* implicit letter */ 
-                WRITE_CHAR(buffer, bufferLength, bufferPos, c); 
-            } else { 
-                /* finished */ 
-                break; 
-            } 
-        } else { 
-            token=tokens[c]; 
-            if(token==(uint16_t)(-2)) { 
-                /* this is a lead byte for a double-byte token */ 
-                token=tokens[c<<8|*name++]; 
-                --nameLength; 
-            } 
-            if(token==(uint16_t)(-1)) { 
-                if(c!=';') { 
-                    /* explicit letter */ 
-                    WRITE_CHAR(buffer, bufferLength, bufferPos, c); 
-                } else { 
-                    /* stop, but skip the semicolon if we are seeking 
-                       extended names and there was no 2.0 name but there 
-                       is a 1.0 name. */ 
-                    if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) { 
-                        if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 
-                            continue; 
-                        } 
-                    } 
-                    /* finished */ 
-                    break; 
-                } 
-            } else { 
-                /* write token word */ 
-                uint8_t *tokenString=tokenStrings+token; 
-                while((c=*tokenString++)!=0) { 
-                    WRITE_CHAR(buffer, bufferLength, bufferPos, c); 
-                } 
-            } 
-        } 
-    } 
- 
-    /* zero-terminate */ 
-    if(bufferLength>0) { 
-        *buffer=0; 
-    } 
- 
-    return bufferPos; 
-} 
- 
-/* 
- * compareName() is almost the same as expandName() except that it compares 
- * the currently expanded name to an input name. 
- * It returns the match/no match result as soon as possible. 
- */ 
-static UBool 
-compareName(UCharNames *names, 
-            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, 
-            const char *otherName) { 
-    uint16_t *tokens=(uint16_t *)names+8; 
-    uint16_t token, tokenCount=*tokens++; 
-    uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset; 
-    uint8_t c; 
-    const char *origOtherName = otherName; 
- 
-    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 
-        /* 
-         * skip the modern name if it is not requested _and_ 
-         * if the semicolon byte value is a character, not a token number 
-         */ 
-        if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 
-            int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice; 
-            do { 
-                while(nameLength>0) { 
-                    --nameLength; 
-                    if(*name++==';') { 
-                        break; 
-                    } 
-                } 
-            } while(--fieldIndex>0); 
-        } else { 
-            /* 
-             * the semicolon byte value is a token number, therefore 
-             * only modern names are stored in unames.dat and there is no 
-             * such requested alternate name here 
-             */ 
-            nameLength=0; 
-        } 
-    } 
- 
-    /* compare each letter directly, and compare a token word per token */ 
-    while(nameLength>0) { 
-        --nameLength; 
-        c=*name++; 
- 
-        if(c>=tokenCount) { 
-            if(c!=';') { 
-                /* implicit letter */ 
-                if((char)c!=*otherName++) { 
-                    return FALSE; 
-                } 
-            } else { 
-                /* finished */ 
-                break; 
-            } 
-        } else { 
-            token=tokens[c]; 
-            if(token==(uint16_t)(-2)) { 
-                /* this is a lead byte for a double-byte token */ 
-                token=tokens[c<<8|*name++]; 
-                --nameLength; 
-            } 
-            if(token==(uint16_t)(-1)) { 
-                if(c!=';') { 
-                    /* explicit letter */ 
-                    if((char)c!=*otherName++) { 
-                        return FALSE; 
-                    } 
-                } else { 
-                    /* stop, but skip the semicolon if we are seeking 
-                       extended names and there was no 2.0 name but there 
-                       is a 1.0 name. */ 
-                    if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) { 
-                        if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) { 
-                            continue; 
-                        } 
-                    } 
-                    /* finished */ 
-                    break; 
-                } 
-            } else { 
-                /* write token word */ 
-                uint8_t *tokenString=tokenStrings+token; 
-                while((c=*tokenString++)!=0) { 
-                    if((char)c!=*otherName++) { 
-                        return FALSE; 
-                    } 
-                } 
-            } 
-        } 
-    } 
- 
-    /* complete match? */ 
-    return (UBool)(*otherName==0); 
-} 
- 
-static uint8_t getCharCat(UChar32 cp) { 
-    uint8_t cat; 
- 
-    if (U_IS_UNICODE_NONCHAR(cp)) { 
-        return U_NONCHARACTER_CODE_POINT; 
-    } 
- 
-    if ((cat = u_charType(cp)) == U_SURROGATE) { 
-        cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE; 
-    } 
- 
-    return cat; 
-} 
- 
-static const char *getCharCatName(UChar32 cp) { 
-    uint8_t cat = getCharCat(cp); 
- 
-    /* Return unknown if the table of names above is not up to 
-       date. */ 
- 
-    if (cat >= UPRV_LENGTHOF(charCatNames)) { 
-        return "unknown"; 
-    } else { 
-        return charCatNames[cat]; 
-    } 
-} 
- 
-static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) { 
-    const char *catname = getCharCatName(code); 
-    uint16_t length = 0; 
- 
-    UChar32 cp; 
-    int ndigits, i; 
-     
-    WRITE_CHAR(buffer, bufferLength, length, '<'); 
-    while (catname[length - 1]) { 
-        WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]); 
-    } 
-    WRITE_CHAR(buffer, bufferLength, length, '-'); 
-    for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4) 
-        ; 
-    if (ndigits < 4) 
-        ndigits = 4; 
-    for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) { 
-        uint8_t v = (uint8_t)(cp & 0xf); 
-        buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10); 
-    } 
-    buffer += ndigits; 
+
+#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
+
+/*
+ * Important: expandName() and compareName() are almost the same -
+ * apply fixes to both.
+ *
+ * UnicodeData.txt uses ';' as a field separator, so no
+ * field can contain ';' as part of its contents.
+ * In unames.dat, it is marked as token[';']==-1 only if the
+ * semicolon is used in the data file - which is iff we
+ * have Unicode 1.0 names or ISO comments or aliases.
+ * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
+ * although we know that it will never be part of a name.
+ */
+static uint16_t
+expandName(UCharNames *names,
+           const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
+           char *buffer, uint16_t bufferLength) {
+    uint16_t *tokens=(uint16_t *)names+8;
+    uint16_t token, tokenCount=*tokens++, bufferPos=0;
+    uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
+    uint8_t c;
+
+    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+        /*
+         * skip the modern name if it is not requested _and_
+         * if the semicolon byte value is a character, not a token number
+         */
+        if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+            int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
+            do {
+                while(nameLength>0) {
+                    --nameLength;
+                    if(*name++==';') {
+                        break;
+                    }
+                }
+            } while(--fieldIndex>0);
+        } else {
+            /*
+             * the semicolon byte value is a token number, therefore
+             * only modern names are stored in unames.dat and there is no
+             * such requested alternate name here
+             */
+            nameLength=0;
+        }
+    }
+
+    /* write each letter directly, and write a token word per token */
+    while(nameLength>0) {
+        --nameLength;
+        c=*name++;
+
+        if(c>=tokenCount) {
+            if(c!=';') {
+                /* implicit letter */
+                WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+            } else {
+                /* finished */
+                break;
+            }
+        } else {
+            token=tokens[c];
+            if(token==(uint16_t)(-2)) {
+                /* this is a lead byte for a double-byte token */
+                token=tokens[c<<8|*name++];
+                --nameLength;
+            }
+            if(token==(uint16_t)(-1)) {
+                if(c!=';') {
+                    /* explicit letter */
+                    WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+                } else {
+                    /* stop, but skip the semicolon if we are seeking
+                       extended names and there was no 2.0 name but there
+                       is a 1.0 name. */
+                    if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
+                        if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+                            continue;
+                        }
+                    }
+                    /* finished */
+                    break;
+                }
+            } else {
+                /* write token word */
+                uint8_t *tokenString=tokenStrings+token;
+                while((c=*tokenString++)!=0) {
+                    WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+                }
+            }
+        }
+    }
+
+    /* zero-terminate */
+    if(bufferLength>0) {
+        *buffer=0;
+    }
+
+    return bufferPos;
+}
+
+/*
+ * compareName() is almost the same as expandName() except that it compares
+ * the currently expanded name to an input name.
+ * It returns the match/no match result as soon as possible.
+ */
+static UBool
+compareName(UCharNames *names,
+            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
+            const char *otherName) {
+    uint16_t *tokens=(uint16_t *)names+8;
+    uint16_t token, tokenCount=*tokens++;
+    uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
+    uint8_t c;
+    const char *origOtherName = otherName;
+
+    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+        /*
+         * skip the modern name if it is not requested _and_
+         * if the semicolon byte value is a character, not a token number
+         */
+        if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+            int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
+            do {
+                while(nameLength>0) {
+                    --nameLength;
+                    if(*name++==';') {
+                        break;
+                    }
+                }
+            } while(--fieldIndex>0);
+        } else {
+            /*
+             * the semicolon byte value is a token number, therefore
+             * only modern names are stored in unames.dat and there is no
+             * such requested alternate name here
+             */
+            nameLength=0;
+        }
+    }
+
+    /* compare each letter directly, and compare a token word per token */
+    while(nameLength>0) {
+        --nameLength;
+        c=*name++;
+
+        if(c>=tokenCount) {
+            if(c!=';') {
+                /* implicit letter */
+                if((char)c!=*otherName++) {
+                    return FALSE;
+                }
+            } else {
+                /* finished */
+                break;
+            }
+        } else {
+            token=tokens[c];
+            if(token==(uint16_t)(-2)) {
+                /* this is a lead byte for a double-byte token */
+                token=tokens[c<<8|*name++];
+                --nameLength;
+            }
+            if(token==(uint16_t)(-1)) {
+                if(c!=';') {
+                    /* explicit letter */
+                    if((char)c!=*otherName++) {
+                        return FALSE;
+                    }
+                } else {
+                    /* stop, but skip the semicolon if we are seeking
+                       extended names and there was no 2.0 name but there
+                       is a 1.0 name. */
+                    if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
+                        if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+                            continue;
+                        }
+                    }
+                    /* finished */
+                    break;
+                }
+            } else {
+                /* write token word */
+                uint8_t *tokenString=tokenStrings+token;
+                while((c=*tokenString++)!=0) {
+                    if((char)c!=*otherName++) {
+                        return FALSE;
+                    }
+                }
+            }
+        }
+    }
+
+    /* complete match? */
+    return (UBool)(*otherName==0);
+}
+
+static uint8_t getCharCat(UChar32 cp) {
+    uint8_t cat;
+
+    if (U_IS_UNICODE_NONCHAR(cp)) {
+        return U_NONCHARACTER_CODE_POINT;
+    }
+
+    if ((cat = u_charType(cp)) == U_SURROGATE) {
+        cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
+    }
+
+    return cat;
+}
+
+static const char *getCharCatName(UChar32 cp) {
+    uint8_t cat = getCharCat(cp);
+
+    /* Return unknown if the table of names above is not up to
+       date. */
+
+    if (cat >= UPRV_LENGTHOF(charCatNames)) {
+        return "unknown";
+    } else {
+        return charCatNames[cat];
+    }
+}
+
+static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
+    const char *catname = getCharCatName(code);
+    uint16_t length = 0;
+
+    UChar32 cp;
+    int ndigits, i;
+    
+    WRITE_CHAR(buffer, bufferLength, length, '<');
+    while (catname[length - 1]) {
+        WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
+    }
+    WRITE_CHAR(buffer, bufferLength, length, '-');
+    for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
+        ;
+    if (ndigits < 4)
+        ndigits = 4;
+    for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
+        uint8_t v = (uint8_t)(cp & 0xf);
+        buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
+    }
+    buffer += ndigits;
     length += static_cast<uint16_t>(ndigits);
-    WRITE_CHAR(buffer, bufferLength, length, '>'); 
- 
-    return length; 
-} 
- 
-/* 
- * getGroup() does a binary search for the group that contains the 
- * Unicode code point "code". 
- * The return value is always a valid Group* that may contain "code" 
- * or else is the highest group before "code". 
- * If the lowest group is after "code", then that one is returned. 
- */ 
-static const uint16_t * 
-getGroup(UCharNames *names, uint32_t code) { 
-    const uint16_t *groups=GET_GROUPS(names); 
-    uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT), 
-             start=0, 
-             limit=*groups++, 
-             number; 
- 
-    /* binary search for the group of names that contains the one for code */ 
-    while(start<limit-1) { 
-        number=(uint16_t)((start+limit)/2); 
-        if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) { 
-            limit=number; 
-        } else { 
-            start=number; 
-        } 
-    } 
- 
-    /* return this regardless of whether it is an exact match */ 
-    return groups+start*GROUP_LENGTH; 
-} 
- 
-/* 
- * expandGroupLengths() reads a block of compressed lengths of 32 strings and 
- * expands them into offsets and lengths for each string. 
- * Lengths are stored with a variable-width encoding in consecutive nibbles: 
- * If a nibble<0xc, then it is the length itself (0=empty string). 
- * If a nibble>=0xc, then it forms a length value with the following nibble. 
- * Calculation see below. 
- * The offsets and lengths arrays must be at least 33 (one more) long because 
- * there is no check here at the end if the last nibble is still used. 
- */ 
-static const uint8_t * 
-expandGroupLengths(const uint8_t *s, 
-                   uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) { 
-    /* read the lengths of the 32 strings in this group and get each string's offset */ 
-    uint16_t i=0, offset=0, length=0; 
-    uint8_t lengthByte; 
- 
-    /* all 32 lengths must be read to get the offset of the first group string */ 
-    while(i<LINES_PER_GROUP) { 
-        lengthByte=*s++; 
- 
-        /* read even nibble - MSBs of lengthByte */ 
-        if(length>=12) { 
-            /* double-nibble length spread across two bytes */ 
-            length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12); 
-            lengthByte&=0xf; 
-        } else if((lengthByte /* &0xf0 */)>=0xc0) { 
-            /* double-nibble length spread across this one byte */ 
-            length=(uint16_t)((lengthByte&0x3f)+12); 
-        } else { 
-            /* single-nibble length in MSBs */ 
-            length=(uint16_t)(lengthByte>>4); 
-            lengthByte&=0xf; 
-        } 
- 
-        *offsets++=offset; 
-        *lengths++=length; 
- 
-        offset+=length; 
-        ++i; 
- 
-        /* read odd nibble - LSBs of lengthByte */ 
-        if((lengthByte&0xf0)==0) { 
-            /* this nibble was not consumed for a double-nibble length above */ 
-            length=lengthByte; 
-            if(length<12) { 
-                /* single-nibble length in LSBs */ 
-                *offsets++=offset; 
-                *lengths++=length; 
- 
-                offset+=length; 
-                ++i; 
-            } 
-        } else { 
-            length=0;   /* prevent double-nibble detection in the next iteration */ 
-        } 
-    } 
- 
-    /* now, s is at the first group string */ 
-    return s; 
-} 
- 
-static uint16_t 
-expandGroupName(UCharNames *names, const uint16_t *group, 
-                uint16_t lineNumber, UCharNameChoice nameChoice, 
-                char *buffer, uint16_t bufferLength) { 
-    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 
-    const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); 
-    s=expandGroupLengths(s, offsets, lengths); 
-    return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice, 
-                      buffer, bufferLength); 
-} 
- 
-static uint16_t 
-getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, 
-        char *buffer, uint16_t bufferLength) { 
-    const uint16_t *group=getGroup(names, code); 
-    if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) { 
-        return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice, 
-                               buffer, bufferLength); 
-    } else { 
-        /* group not found */ 
-        /* zero-terminate */ 
-        if(bufferLength>0) { 
-            *buffer=0; 
-        } 
-        return 0; 
-    } 
-} 
- 
-/* 
- * enumGroupNames() enumerates all the names in a 32-group 
- * and either calls the enumerator function or finds a given input name. 
- */ 
-static UBool 
-enumGroupNames(UCharNames *names, const uint16_t *group, 
-               UChar32 start, UChar32 end, 
-               UEnumCharNamesFn *fn, void *context, 
-               UCharNameChoice nameChoice) { 
-    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 
-    const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group); 
- 
-    s=expandGroupLengths(s, offsets, lengths); 
-    if(fn!=DO_FIND_NAME) { 
-        char buffer[200]; 
-        uint16_t length; 
- 
-        while(start<=end) { 
-            length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer)); 
-            if (!length && nameChoice == U_EXTENDED_CHAR_NAME) { 
-                buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; 
-            } 
-            /* here, we assume that the buffer is large enough */ 
-            if(length>0) { 
-                if(!fn(context, start, nameChoice, buffer, length)) { 
-                    return FALSE; 
-                } 
-            } 
-            ++start; 
-        } 
-    } else { 
-        const char *otherName=((FindName *)context)->otherName; 
-        while(start<=end) { 
-            if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) { 
-                ((FindName *)context)->code=start; 
-                return FALSE; 
-            } 
-            ++start; 
-        } 
-    } 
-    return TRUE; 
-} 
- 
-/* 
- * enumExtNames enumerate extended names. 
- * It only needs to do it if it is called with a real function and not 
- * with the dummy DO_FIND_NAME, because u_charFromName() does a check 
- * for extended names by itself. 
- */  
-static UBool 
-enumExtNames(UChar32 start, UChar32 end, 
-             UEnumCharNamesFn *fn, void *context) 
-{ 
-    if(fn!=DO_FIND_NAME) { 
-        char buffer[200]; 
-        uint16_t length; 
-         
-        while(start<=end) { 
-            buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0; 
-            /* here, we assume that the buffer is large enough */ 
-            if(length>0) { 
-                if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) { 
-                    return FALSE; 
-                } 
-            } 
-            ++start; 
-        } 
-    } 
- 
-    return TRUE; 
-} 
- 
-static UBool 
-enumNames(UCharNames *names, 
-          UChar32 start, UChar32 limit, 
-          UEnumCharNamesFn *fn, void *context, 
-          UCharNameChoice nameChoice) { 
-    uint16_t startGroupMSB, endGroupMSB, groupCount; 
-    const uint16_t *group, *groupLimit; 
- 
-    startGroupMSB=(uint16_t)(start>>GROUP_SHIFT); 
-    endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT); 
- 
-    /* find the group that contains start, or the highest before it */ 
-    group=getGroup(names, start); 
- 
-    if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) { 
-        /* enumerate synthetic names between start and the group start */ 
-        UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT); 
-        if(extLimit>limit) { 
-            extLimit=limit; 
-        } 
-        if(!enumExtNames(start, extLimit-1, fn, context)) { 
-            return FALSE; 
-        } 
-        start=extLimit; 
-    } 
- 
-    if(startGroupMSB==endGroupMSB) { 
-        if(startGroupMSB==group[GROUP_MSB]) { 
-            /* if start and limit-1 are in the same group, then enumerate only in that one */ 
-            return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice); 
-        } 
-    } else { 
-        const uint16_t *groups=GET_GROUPS(names); 
-        groupCount=*groups++; 
-        groupLimit=groups+groupCount*GROUP_LENGTH; 
- 
-        if(startGroupMSB==group[GROUP_MSB]) { 
-            /* enumerate characters in the partial start group */ 
-            if((start&GROUP_MASK)!=0) { 
-                if(!enumGroupNames(names, group, 
-                                   start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1, 
-                                   fn, context, nameChoice)) { 
-                    return FALSE; 
-                } 
-                group=NEXT_GROUP(group); /* continue with the next group */ 
-            } 
-        } else if(startGroupMSB>group[GROUP_MSB]) { 
-            /* make sure that we start enumerating with the first group after start */ 
-            const uint16_t *nextGroup=NEXT_GROUP(group); 
-            if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) { 
-                UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; 
-                if (end > limit) { 
-                    end = limit; 
-                } 
-                if (!enumExtNames(start, end - 1, fn, context)) { 
-                    return FALSE; 
-                } 
-            } 
-            group=nextGroup; 
-        } 
- 
-        /* enumerate entire groups between the start- and end-groups */ 
-        while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) { 
-            const uint16_t *nextGroup; 
-            start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT; 
-            if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) { 
-                return FALSE; 
-            } 
-            nextGroup=NEXT_GROUP(group); 
-            if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) { 
-                UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT; 
-                if (end > limit) { 
-                    end = limit; 
-                } 
-                if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) { 
-                    return FALSE; 
-                } 
-            } 
-            group=nextGroup; 
-        } 
- 
-        /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */ 
-        if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) { 
-            return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice); 
-        } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) { 
-            UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT; 
-            if (next > start) { 
-                start = next; 
-            } 
-        } else { 
-            return TRUE; 
-        } 
-    } 
- 
-    /* we have not found a group, which means everything is made of 
-       extended names. */ 
-    if (nameChoice == U_EXTENDED_CHAR_NAME) { 
-        if (limit > UCHAR_MAX_VALUE + 1) { 
-            limit = UCHAR_MAX_VALUE + 1; 
-        } 
-        return enumExtNames(start, limit - 1, fn, context); 
-    } 
-     
-    return TRUE; 
-} 
- 
-static uint16_t 
-writeFactorSuffix(const uint16_t *factors, uint16_t count, 
-                  const char *s, /* suffix elements */ 
-                  uint32_t code, 
-                  uint16_t indexes[8], /* output fields from here */ 
-                  const char *elementBases[8], const char *elements[8], 
-                  char *buffer, uint16_t bufferLength) { 
-    uint16_t i, factor, bufferPos=0; 
-    char c; 
- 
-    /* write elements according to the factors */ 
- 
-    /* 
-     * the factorized elements are determined by modulo arithmetic 
-     * with the factors of this algorithm 
-     * 
-     * note that for fewer operations, count is decremented here 
-     */ 
-    --count; 
-    for(i=count; i>0; --i) { 
-        factor=factors[i]; 
-        indexes[i]=(uint16_t)(code%factor); 
-        code/=factor; 
-    } 
-    /* 
-     * we don't need to calculate the last modulus because start<=code<=end 
-     * guarantees here that code<=factors[0] 
-     */ 
-    indexes[0]=(uint16_t)code; 
- 
-    /* write each element */ 
-    for(;;) { 
-        if(elementBases!=NULL) { 
-            *elementBases++=s; 
-        } 
- 
-        /* skip indexes[i] strings */ 
-        factor=indexes[i]; 
-        while(factor>0) { 
-            while(*s++!=0) {} 
-            --factor; 
-        } 
-        if(elements!=NULL) { 
-            *elements++=s; 
-        } 
- 
-        /* write element */ 
-        while((c=*s++)!=0) { 
-            WRITE_CHAR(buffer, bufferLength, bufferPos, c); 
-        } 
- 
-        /* we do not need to perform the rest of this loop for i==count - break here */ 
-        if(i>=count) { 
-            break; 
-        } 
- 
-        /* skip the rest of the strings for this factors[i] */ 
-        factor=(uint16_t)(factors[i]-indexes[i]-1); 
-        while(factor>0) { 
-            while(*s++!=0) {} 
-            --factor; 
-        } 
- 
-        ++i; 
-    } 
- 
-    /* zero-terminate */ 
-    if(bufferLength>0) { 
-        *buffer=0; 
-    } 
- 
-    return bufferPos; 
-} 
- 
-/* 
- * Important: 
- * Parts of findAlgName() are almost the same as some of getAlgName(). 
- * Fixes must be applied to both. 
- */ 
-static uint16_t 
-getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, 
-        char *buffer, uint16_t bufferLength) { 
-    uint16_t bufferPos=0; 
- 
-    /* Only the normative character name can be algorithmic. */ 
-    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 
-        /* zero-terminate */ 
-        if(bufferLength>0) { 
-            *buffer=0; 
-        } 
-        return 0; 
-    } 
- 
-    switch(range->type) { 
-    case 0: { 
-        /* name = prefix hex-digits */ 
-        const char *s=(const char *)(range+1); 
-        char c; 
- 
-        uint16_t i, count; 
- 
-        /* copy prefix */ 
-        while((c=*s++)!=0) { 
-            WRITE_CHAR(buffer, bufferLength, bufferPos, c); 
-        } 
- 
-        /* write hexadecimal code point value */ 
-        count=range->variant; 
- 
-        /* zero-terminate */ 
-        if(count<bufferLength) { 
-            buffer[count]=0; 
-        } 
- 
-        for(i=count; i>0;) { 
-            if(--i<bufferLength) { 
-                c=(char)(code&0xf); 
-                if(c<10) { 
-                    c+='0'; 
-                } else { 
-                    c+='A'-10; 
-                } 
-                buffer[i]=c; 
-            } 
-            code>>=4; 
-        } 
- 
-        bufferPos+=count; 
-        break; 
-    } 
-    case 1: { 
-        /* name = prefix factorized-elements */ 
-        uint16_t indexes[8]; 
-        const uint16_t *factors=(const uint16_t *)(range+1); 
-        uint16_t count=range->variant; 
-        const char *s=(const char *)(factors+count); 
-        char c; 
- 
-        /* copy prefix */ 
-        while((c=*s++)!=0) { 
-            WRITE_CHAR(buffer, bufferLength, bufferPos, c); 
-        } 
- 
-        bufferPos+=writeFactorSuffix(factors, count, 
-                                     s, code-range->start, indexes, NULL, NULL, buffer, bufferLength); 
-        break; 
-    } 
-    default: 
-        /* undefined type */ 
-        /* zero-terminate */ 
-        if(bufferLength>0) { 
-            *buffer=0; 
-        } 
-        break; 
-    } 
- 
-    return bufferPos; 
-} 
- 
-/* 
- * Important: enumAlgNames() and findAlgName() are almost the same. 
- * Any fix must be applied to both. 
- */ 
-static UBool 
-enumAlgNames(AlgorithmicRange *range, 
-             UChar32 start, UChar32 limit, 
-             UEnumCharNamesFn *fn, void *context, 
-             UCharNameChoice nameChoice) { 
-    char buffer[200]; 
-    uint16_t length; 
- 
-    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 
-        return TRUE; 
-    } 
- 
-    switch(range->type) { 
-    case 0: { 
-        char *s, *end; 
-        char c; 
- 
-        /* get the full name of the start character */ 
-        length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer)); 
-        if(length<=0) { 
-            return TRUE; 
-        } 
- 
-        /* call the enumerator function with this first character */ 
-        if(!fn(context, start, nameChoice, buffer, length)) { 
-            return FALSE; 
-        } 
- 
-        /* go to the end of the name; all these names have the same length */ 
-        end=buffer; 
-        while(*end!=0) { 
-            ++end; 
-        } 
- 
-        /* enumerate the rest of the names */ 
-        while(++start<limit) { 
-            /* increment the hexadecimal number on a character-basis */ 
-            s=end; 
-            for (;;) { 
-                c=*--s; 
-                if(('0'<=c && c<'9') || ('A'<=c && c<'F')) { 
-                    *s=(char)(c+1); 
-                    break; 
-                } else if(c=='9') { 
-                    *s='A'; 
-                    break; 
-                } else if(c=='F') { 
-                    *s='0'; 
-                } 
-            } 
- 
-            if(!fn(context, start, nameChoice, buffer, length)) { 
-                return FALSE; 
-            } 
-        } 
-        break; 
-    } 
-    case 1: { 
-        uint16_t indexes[8]; 
-        const char *elementBases[8], *elements[8]; 
-        const uint16_t *factors=(const uint16_t *)(range+1); 
-        uint16_t count=range->variant; 
-        const char *s=(const char *)(factors+count); 
-        char *suffix, *t; 
-        uint16_t prefixLength, i, idx; 
- 
-        char c; 
- 
-        /* name = prefix factorized-elements */ 
- 
-        /* copy prefix */ 
-        suffix=buffer; 
-        prefixLength=0; 
-        while((c=*s++)!=0) { 
-            *suffix++=c; 
-            ++prefixLength; 
-        } 
- 
-        /* append the suffix of the start character */ 
-        length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count, 
-                                              s, (uint32_t)start-range->start, 
-                                              indexes, elementBases, elements, 
-                                              suffix, (uint16_t)(sizeof(buffer)-prefixLength))); 
- 
-        /* call the enumerator function with this first character */ 
-        if(!fn(context, start, nameChoice, buffer, length)) { 
-            return FALSE; 
-        } 
- 
-        /* enumerate the rest of the names */ 
-        while(++start<limit) { 
-            /* increment the indexes in lexical order bound by the factors */ 
-            i=count; 
-            for (;;) { 
-                idx=(uint16_t)(indexes[--i]+1); 
-                if(idx<factors[i]) { 
-                    /* skip one index and its element string */ 
-                    indexes[i]=idx; 
-                    s=elements[i]; 
-                    while(*s++!=0) { 
-                    } 
-                    elements[i]=s; 
-                    break; 
-                } else { 
-                    /* reset this index to 0 and its element string to the first one */ 
-                    indexes[i]=0; 
-                    elements[i]=elementBases[i]; 
-                } 
-            } 
- 
-            /* to make matters a little easier, just append all elements to the suffix */ 
-            t=suffix; 
-            length=prefixLength; 
-            for(i=0; i<count; ++i) { 
-                s=elements[i]; 
-                while((c=*s++)!=0) { 
-                    *t++=c; 
-                    ++length; 
-                } 
-            } 
-            /* zero-terminate */ 
-            *t=0; 
- 
-            if(!fn(context, start, nameChoice, buffer, length)) { 
-                return FALSE; 
-            } 
-        } 
-        break; 
-    } 
-    default: 
-        /* undefined type */ 
-        break; 
-    } 
- 
-    return TRUE; 
-} 
- 
-/* 
- * findAlgName() is almost the same as enumAlgNames() except that it 
- * returns the code point for a name if it fits into the range. 
- * It returns 0xffff otherwise. 
+    WRITE_CHAR(buffer, bufferLength, length, '>');
+
+    return length;
+}
+
+/*
+ * getGroup() does a binary search for the group that contains the
+ * Unicode code point "code".
+ * The return value is always a valid Group* that may contain "code"
+ * or else is the highest group before "code".
+ * If the lowest group is after "code", then that one is returned.
+ */
+static const uint16_t *
+getGroup(UCharNames *names, uint32_t code) {
+    const uint16_t *groups=GET_GROUPS(names);
+    uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
+             start=0,
+             limit=*groups++,
+             number;
+
+    /* binary search for the group of names that contains the one for code */
+    while(start<limit-1) {
+        number=(uint16_t)((start+limit)/2);
+        if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
+            limit=number;
+        } else {
+            start=number;
+        }
+    }
+
+    /* return this regardless of whether it is an exact match */
+    return groups+start*GROUP_LENGTH;
+}
+
+/*
+ * expandGroupLengths() reads a block of compressed lengths of 32 strings and
+ * expands them into offsets and lengths for each string.
+ * Lengths are stored with a variable-width encoding in consecutive nibbles:
+ * If a nibble<0xc, then it is the length itself (0=empty string).
+ * If a nibble>=0xc, then it forms a length value with the following nibble.
+ * Calculation see below.
+ * The offsets and lengths arrays must be at least 33 (one more) long because
+ * there is no check here at the end if the last nibble is still used.
+ */
+static const uint8_t *
+expandGroupLengths(const uint8_t *s,
+                   uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
+    /* read the lengths of the 32 strings in this group and get each string's offset */
+    uint16_t i=0, offset=0, length=0;
+    uint8_t lengthByte;
+
+    /* all 32 lengths must be read to get the offset of the first group string */
+    while(i<LINES_PER_GROUP) {
+        lengthByte=*s++;
+
+        /* read even nibble - MSBs of lengthByte */
+        if(length>=12) {
+            /* double-nibble length spread across two bytes */
+            length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
+            lengthByte&=0xf;
+        } else if((lengthByte /* &0xf0 */)>=0xc0) {
+            /* double-nibble length spread across this one byte */
+            length=(uint16_t)((lengthByte&0x3f)+12);
+        } else {
+            /* single-nibble length in MSBs */
+            length=(uint16_t)(lengthByte>>4);
+            lengthByte&=0xf;
+        }
+
+        *offsets++=offset;
+        *lengths++=length;
+
+        offset+=length;
+        ++i;
+
+        /* read odd nibble - LSBs of lengthByte */
+        if((lengthByte&0xf0)==0) {
+            /* this nibble was not consumed for a double-nibble length above */
+            length=lengthByte;
+            if(length<12) {
+                /* single-nibble length in LSBs */
+                *offsets++=offset;
+                *lengths++=length;
+
+                offset+=length;
+                ++i;
+            }
+        } else {
+            length=0;   /* prevent double-nibble detection in the next iteration */
+        }
+    }
+
+    /* now, s is at the first group string */
+    return s;
+}
+
+static uint16_t
+expandGroupName(UCharNames *names, const uint16_t *group,
+                uint16_t lineNumber, UCharNameChoice nameChoice,
+                char *buffer, uint16_t bufferLength) {
+    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+    const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
+    s=expandGroupLengths(s, offsets, lengths);
+    return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
+                      buffer, bufferLength);
+}
+
+static uint16_t
+getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
+        char *buffer, uint16_t bufferLength) {
+    const uint16_t *group=getGroup(names, code);
+    if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
+        return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
+                               buffer, bufferLength);
+    } else {
+        /* group not found */
+        /* zero-terminate */
+        if(bufferLength>0) {
+            *buffer=0;
+        }
+        return 0;
+    }
+}
+
+/*
+ * enumGroupNames() enumerates all the names in a 32-group
+ * and either calls the enumerator function or finds a given input name.
+ */
+static UBool
+enumGroupNames(UCharNames *names, const uint16_t *group,
+               UChar32 start, UChar32 end,
+               UEnumCharNamesFn *fn, void *context,
+               UCharNameChoice nameChoice) {
+    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+    const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
+
+    s=expandGroupLengths(s, offsets, lengths);
+    if(fn!=DO_FIND_NAME) {
+        char buffer[200];
+        uint16_t length;
+
+        while(start<=end) {
+            length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
+            if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
+                buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
+            }
+            /* here, we assume that the buffer is large enough */
+            if(length>0) {
+                if(!fn(context, start, nameChoice, buffer, length)) {
+                    return FALSE;
+                }
+            }
+            ++start;
+        }
+    } else {
+        const char *otherName=((FindName *)context)->otherName;
+        while(start<=end) {
+            if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
+                ((FindName *)context)->code=start;
+                return FALSE;
+            }
+            ++start;
+        }
+    }
+    return TRUE;
+}
+
+/*
+ * enumExtNames enumerate extended names.
+ * It only needs to do it if it is called with a real function and not
+ * with the dummy DO_FIND_NAME, because u_charFromName() does a check
+ * for extended names by itself.
  */ 
-static UChar32 
-findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) { 
-    UChar32 code; 
- 
-    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) { 
-        return 0xffff; 
-    } 
- 
-    switch(range->type) { 
-    case 0: { 
-        /* name = prefix hex-digits */ 
-        const char *s=(const char *)(range+1); 
-        char c; 
- 
-        uint16_t i, count; 
- 
-        /* compare prefix */ 
-        while((c=*s++)!=0) { 
-            if((char)c!=*otherName++) { 
-                return 0xffff; 
-            } 
-        } 
- 
-        /* read hexadecimal code point value */ 
-        count=range->variant; 
-        code=0; 
-        for(i=0; i<count; ++i) { 
-            c=*otherName++; 
-            if('0'<=c && c<='9') { 
-                code=(code<<4)|(c-'0'); 
-            } else if('A'<=c && c<='F') { 
-                code=(code<<4)|(c-'A'+10); 
-            } else { 
-                return 0xffff; 
-            } 
-        } 
- 
-        /* does it fit into the range? */ 
-        if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) { 
-            return code; 
-        } 
-        break; 
-    } 
-    case 1: { 
-        char buffer[64]; 
-        uint16_t indexes[8]; 
-        const char *elementBases[8], *elements[8]; 
-        const uint16_t *factors=(const uint16_t *)(range+1); 
-        uint16_t count=range->variant; 
-        const char *s=(const char *)(factors+count), *t; 
-        UChar32 start, limit; 
-        uint16_t i, idx; 
- 
-        char c; 
- 
-        /* name = prefix factorized-elements */ 
- 
-        /* compare prefix */ 
-        while((c=*s++)!=0) { 
-            if((char)c!=*otherName++) { 
-                return 0xffff; 
-            } 
-        } 
- 
-        start=(UChar32)range->start; 
-        limit=(UChar32)(range->end+1); 
- 
-        /* initialize the suffix elements for enumeration; indexes should all be set to 0 */ 
-        writeFactorSuffix(factors, count, s, 0, 
-                          indexes, elementBases, elements, buffer, sizeof(buffer)); 
- 
-        /* compare the first suffix */ 
-        if(0==uprv_strcmp(otherName, buffer)) { 
-            return start; 
-        } 
- 
-        /* enumerate and compare the rest of the suffixes */ 
-        while(++start<limit) { 
-            /* increment the indexes in lexical order bound by the factors */ 
-            i=count; 
-            for (;;) { 
-                idx=(uint16_t)(indexes[--i]+1); 
-                if(idx<factors[i]) { 
-                    /* skip one index and its element string */ 
-                    indexes[i]=idx; 
-                    s=elements[i]; 
-                    while(*s++!=0) {} 
-                    elements[i]=s; 
-                    break; 
-                } else { 
-                    /* reset this index to 0 and its element string to the first one */ 
-                    indexes[i]=0; 
-                    elements[i]=elementBases[i]; 
-                } 
-            } 
- 
-            /* to make matters a little easier, just compare all elements of the suffix */ 
-            t=otherName; 
-            for(i=0; i<count; ++i) { 
-                s=elements[i]; 
-                while((c=*s++)!=0) { 
-                    if(c!=*t++) { 
-                        s=""; /* does not match */ 
-                        i=99; 
-                    } 
-                } 
-            } 
-            if(i<99 && *t==0) { 
-                return start; 
-            } 
-        } 
-        break; 
-    } 
-    default: 
-        /* undefined type */ 
-        break; 
-    } 
- 
-    return 0xffff; 
-} 
- 
-/* sets of name characters, maximum name lengths ---------------------------- */ 
- 
-#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f))) 
-#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0) 
- 
-static int32_t 
-calcStringSetLength(uint32_t set[8], const char *s) { 
-    int32_t length=0; 
-    char c; 
- 
-    while((c=*s++)!=0) { 
-        SET_ADD(set, c); 
-        ++length; 
-    } 
-    return length; 
-} 
- 
-static int32_t 
-calcAlgNameSetsLengths(int32_t maxNameLength) { 
-    AlgorithmicRange *range; 
-    uint32_t *p; 
-    uint32_t rangeCount; 
-    int32_t length; 
- 
-    /* enumerate algorithmic ranges */ 
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 
-    rangeCount=*p; 
-    range=(AlgorithmicRange *)(p+1); 
-    while(rangeCount>0) { 
-        switch(range->type) { 
-        case 0: 
-            /* name = prefix + (range->variant times) hex-digits */ 
-            /* prefix */ 
-            length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant; 
-            if(length>maxNameLength) { 
-                maxNameLength=length; 
-            } 
-            break; 
-        case 1: { 
-            /* name = prefix factorized-elements */ 
-            const uint16_t *factors=(const uint16_t *)(range+1); 
-            const char *s; 
-            int32_t i, count=range->variant, factor, factorLength, maxFactorLength; 
- 
-            /* prefix length */ 
-            s=(const char *)(factors+count); 
-            length=calcStringSetLength(gNameSet, s); 
-            s+=length+1; /* start of factor suffixes */ 
- 
-            /* get the set and maximum factor suffix length for each factor */ 
-            for(i=0; i<count; ++i) { 
-                maxFactorLength=0; 
-                for(factor=factors[i]; factor>0; --factor) { 
-                    factorLength=calcStringSetLength(gNameSet, s); 
-                    s+=factorLength+1; 
-                    if(factorLength>maxFactorLength) { 
-                        maxFactorLength=factorLength; 
-                    } 
-                } 
-                length+=maxFactorLength; 
-            } 
- 
-            if(length>maxNameLength) { 
-                maxNameLength=length; 
-            } 
-            break; 
-        } 
-        default: 
-            /* unknown type */ 
-            break; 
-        } 
- 
-        range=(AlgorithmicRange *)((uint8_t *)range+range->size); 
-        --rangeCount; 
-    } 
-    return maxNameLength; 
-} 
- 
-static int32_t 
-calcExtNameSetsLengths(int32_t maxNameLength) { 
-    int32_t i, length; 
- 
-    for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) { 
-        /* 
-         * for each category, count the length of the category name 
-         * plus 9= 
-         * 2 for <> 
-         * 1 for - 
-         * 6 for most hex digits per code point 
-         */ 
-        length=9+calcStringSetLength(gNameSet, charCatNames[i]); 
-        if(length>maxNameLength) { 
-            maxNameLength=length; 
-        } 
-    } 
-    return maxNameLength; 
-} 
- 
-static int32_t 
-calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths, 
-                  uint32_t set[8], 
-                  const uint8_t **pLine, const uint8_t *lineLimit) { 
-    const uint8_t *line=*pLine; 
-    int32_t length=0, tokenLength; 
-    uint16_t c, token; 
- 
-    while(line!=lineLimit && (c=*line++)!=(uint8_t)';') { 
-        if(c>=tokenCount) { 
-            /* implicit letter */ 
-            SET_ADD(set, c); 
-            ++length; 
-        } else { 
-            token=tokens[c]; 
-            if(token==(uint16_t)(-2)) { 
-                /* this is a lead byte for a double-byte token */ 
-                c=c<<8|*line++; 
-                token=tokens[c]; 
-            } 
-            if(token==(uint16_t)(-1)) { 
-                /* explicit letter */ 
-                SET_ADD(set, c); 
-                ++length; 
-            } else { 
-                /* count token word */ 
-                if(tokenLengths!=NULL) { 
-                    /* use cached token length */ 
-                    tokenLength=tokenLengths[c]; 
-                    if(tokenLength==0) { 
-                        tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); 
-                        tokenLengths[c]=(int8_t)tokenLength; 
-                    } 
-                } else { 
-                    tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token); 
-                } 
-                length+=tokenLength; 
-            } 
-        } 
-    } 
- 
-    *pLine=line; 
-    return length; 
-} 
- 
-static void 
-calcGroupNameSetsLengths(int32_t maxNameLength) { 
-    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2]; 
- 
-    uint16_t *tokens=(uint16_t *)uCharNames+8; 
-    uint16_t tokenCount=*tokens++; 
-    uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset; 
- 
-    int8_t *tokenLengths; 
- 
-    const uint16_t *group; 
-    const uint8_t *s, *line, *lineLimit; 
- 
-    int32_t groupCount, lineNumber, length; 
- 
-    tokenLengths=(int8_t *)uprv_malloc(tokenCount); 
-    if(tokenLengths!=NULL) { 
-        uprv_memset(tokenLengths, 0, tokenCount); 
-    } 
- 
-    group=GET_GROUPS(uCharNames); 
-    groupCount=*group++; 
- 
-    /* enumerate all groups */ 
-    while(groupCount>0) { 
-        s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group); 
-        s=expandGroupLengths(s, offsets, lengths); 
- 
-        /* enumerate all lines in each group */ 
-        for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) { 
-            line=s+offsets[lineNumber]; 
-            length=lengths[lineNumber]; 
-            if(length==0) { 
-                continue; 
-            } 
- 
-            lineLimit=line+length; 
- 
-            /* read regular name */ 
-            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); 
-            if(length>maxNameLength) { 
-                maxNameLength=length; 
-            } 
-            if(line==lineLimit) { 
-                continue; 
-            } 
- 
-            /* read Unicode 1.0 name */ 
-            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit); 
-            if(length>maxNameLength) { 
-                maxNameLength=length; 
-            } 
-            if(line==lineLimit) { 
-                continue; 
-            } 
- 
-            /* read ISO comment */ 
-            /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/ 
-        } 
- 
-        group=NEXT_GROUP(group); 
-        --groupCount; 
-    } 
- 
-    if(tokenLengths!=NULL) { 
-        uprv_free(tokenLengths); 
-    } 
- 
-    /* set gMax... - name length last for threading */ 
-    gMaxNameLength=maxNameLength; 
-} 
- 
-static UBool 
-calcNameSetsLengths(UErrorCode *pErrorCode) { 
-    static const char extChars[]="0123456789ABCDEF<>-"; 
-    int32_t i, maxNameLength; 
- 
-    if(gMaxNameLength!=0) { 
-        return TRUE; 
-    } 
- 
-    if(!isDataLoaded(pErrorCode)) { 
-        return FALSE; 
-    } 
- 
-    /* set hex digits, used in various names, and <>-, used in extended names */ 
-    for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) { 
-        SET_ADD(gNameSet, extChars[i]); 
-    } 
- 
-    /* set sets and lengths from algorithmic names */ 
-    maxNameLength=calcAlgNameSetsLengths(0); 
- 
-    /* set sets and lengths from extended names */ 
-    maxNameLength=calcExtNameSetsLengths(maxNameLength); 
- 
-    /* set sets and lengths from group names, set global maximum values */ 
-    calcGroupNameSetsLengths(maxNameLength); 
- 
-    return TRUE; 
-} 
- 
-U_NAMESPACE_END 
- 
-/* public API --------------------------------------------------------------- */ 
- 
-U_NAMESPACE_USE 
- 
-U_CAPI int32_t U_EXPORT2 
-u_charName(UChar32 code, UCharNameChoice nameChoice, 
-           char *buffer, int32_t bufferLength, 
-           UErrorCode *pErrorCode) { 
-     AlgorithmicRange *algRange; 
-    uint32_t *p; 
-    uint32_t i; 
-    int32_t length; 
- 
-    /* check the argument values */ 
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 
-        return 0; 
-    } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || 
-              bufferLength<0 || (bufferLength>0 && buffer==NULL) 
-    ) { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-        return 0; 
-    } 
- 
-    if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) { 
-        return u_terminateChars(buffer, bufferLength, 0, pErrorCode); 
-    } 
- 
-    length=0; 
- 
-    /* try algorithmic names first */ 
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 
-    i=*p; 
-    algRange=(AlgorithmicRange *)(p+1); 
-    while(i>0) { 
-        if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) { 
-            length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); 
-            break; 
-        } 
-        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 
-        --i; 
-    } 
- 
-    if(i==0) { 
-        if (nameChoice == U_EXTENDED_CHAR_NAME) { 
-            length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength); 
-            if (!length) { 
-                /* extended character name */ 
-                length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength); 
-            } 
-        } else { 
-            /* normal character name */ 
-            length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength); 
-        } 
-    } 
- 
-    return u_terminateChars(buffer, bufferLength, length, pErrorCode); 
-} 
- 
-U_CAPI int32_t U_EXPORT2 
-u_getISOComment(UChar32 /*c*/, 
-                char *dest, int32_t destCapacity, 
-                UErrorCode *pErrorCode) { 
-    /* check the argument values */ 
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 
-        return 0; 
-    } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-        return 0; 
-    } 
- 
-    return u_terminateChars(dest, destCapacity, 0, pErrorCode); 
-} 
- 
-U_CAPI UChar32 U_EXPORT2 
-u_charFromName(UCharNameChoice nameChoice, 
-               const char *name, 
-               UErrorCode *pErrorCode) { 
+static UBool
+enumExtNames(UChar32 start, UChar32 end,
+             UEnumCharNamesFn *fn, void *context)
+{
+    if(fn!=DO_FIND_NAME) {
+        char buffer[200];
+        uint16_t length;
+        
+        while(start<=end) {
+            buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
+            /* here, we assume that the buffer is large enough */
+            if(length>0) {
+                if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
+                    return FALSE;
+                }
+            }
+            ++start;
+        }
+    }
+
+    return TRUE;
+}
+
+static UBool
+enumNames(UCharNames *names,
+          UChar32 start, UChar32 limit,
+          UEnumCharNamesFn *fn, void *context,
+          UCharNameChoice nameChoice) {
+    uint16_t startGroupMSB, endGroupMSB, groupCount;
+    const uint16_t *group, *groupLimit;
+
+    startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
+    endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
+
+    /* find the group that contains start, or the highest before it */
+    group=getGroup(names, start);
+
+    if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
+        /* enumerate synthetic names between start and the group start */
+        UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
+        if(extLimit>limit) {
+            extLimit=limit;
+        }
+        if(!enumExtNames(start, extLimit-1, fn, context)) {
+            return FALSE;
+        }
+        start=extLimit;
+    }
+
+    if(startGroupMSB==endGroupMSB) {
+        if(startGroupMSB==group[GROUP_MSB]) {
+            /* if start and limit-1 are in the same group, then enumerate only in that one */
+            return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
+        }
+    } else {
+        const uint16_t *groups=GET_GROUPS(names);
+        groupCount=*groups++;
+        groupLimit=groups+groupCount*GROUP_LENGTH;
+
+        if(startGroupMSB==group[GROUP_MSB]) {
+            /* enumerate characters in the partial start group */
+            if((start&GROUP_MASK)!=0) {
+                if(!enumGroupNames(names, group,
+                                   start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
+                                   fn, context, nameChoice)) {
+                    return FALSE;
+                }
+                group=NEXT_GROUP(group); /* continue with the next group */
+            }
+        } else if(startGroupMSB>group[GROUP_MSB]) {
+            /* make sure that we start enumerating with the first group after start */
+            const uint16_t *nextGroup=NEXT_GROUP(group);
+            if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
+                UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
+                if (end > limit) {
+                    end = limit;
+                }
+                if (!enumExtNames(start, end - 1, fn, context)) {
+                    return FALSE;
+                }
+            }
+            group=nextGroup;
+        }
+
+        /* enumerate entire groups between the start- and end-groups */
+        while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
+            const uint16_t *nextGroup;
+            start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
+            if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
+                return FALSE;
+            }
+            nextGroup=NEXT_GROUP(group);
+            if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
+                UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
+                if (end > limit) {
+                    end = limit;
+                }
+                if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
+                    return FALSE;
+                }
+            }
+            group=nextGroup;
+        }
+
+        /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
+        if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
+            return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
+        } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
+            UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
+            if (next > start) {
+                start = next;
+            }
+        } else {
+            return TRUE;
+        }
+    }
+
+    /* we have not found a group, which means everything is made of
+       extended names. */
+    if (nameChoice == U_EXTENDED_CHAR_NAME) {
+        if (limit > UCHAR_MAX_VALUE + 1) {
+            limit = UCHAR_MAX_VALUE + 1;
+        }
+        return enumExtNames(start, limit - 1, fn, context);
+    }
+    
+    return TRUE;
+}
+
+static uint16_t
+writeFactorSuffix(const uint16_t *factors, uint16_t count,
+                  const char *s, /* suffix elements */
+                  uint32_t code,
+                  uint16_t indexes[8], /* output fields from here */
+                  const char *elementBases[8], const char *elements[8],
+                  char *buffer, uint16_t bufferLength) {
+    uint16_t i, factor, bufferPos=0;
+    char c;
+
+    /* write elements according to the factors */
+
+    /*
+     * the factorized elements are determined by modulo arithmetic
+     * with the factors of this algorithm
+     *
+     * note that for fewer operations, count is decremented here
+     */
+    --count;
+    for(i=count; i>0; --i) {
+        factor=factors[i];
+        indexes[i]=(uint16_t)(code%factor);
+        code/=factor;
+    }
+    /*
+     * we don't need to calculate the last modulus because start<=code<=end
+     * guarantees here that code<=factors[0]
+     */
+    indexes[0]=(uint16_t)code;
+
+    /* write each element */
+    for(;;) {
+        if(elementBases!=NULL) {
+            *elementBases++=s;
+        }
+
+        /* skip indexes[i] strings */
+        factor=indexes[i];
+        while(factor>0) {
+            while(*s++!=0) {}
+            --factor;
+        }
+        if(elements!=NULL) {
+            *elements++=s;
+        }
+
+        /* write element */
+        while((c=*s++)!=0) {
+            WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+        }
+
+        /* we do not need to perform the rest of this loop for i==count - break here */
+        if(i>=count) {
+            break;
+        }
+
+        /* skip the rest of the strings for this factors[i] */
+        factor=(uint16_t)(factors[i]-indexes[i]-1);
+        while(factor>0) {
+            while(*s++!=0) {}
+            --factor;
+        }
+
+        ++i;
+    }
+
+    /* zero-terminate */
+    if(bufferLength>0) {
+        *buffer=0;
+    }
+
+    return bufferPos;
+}
+
+/*
+ * Important:
+ * Parts of findAlgName() are almost the same as some of getAlgName().
+ * Fixes must be applied to both.
+ */
+static uint16_t
+getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
+        char *buffer, uint16_t bufferLength) {
+    uint16_t bufferPos=0;
+
+    /* Only the normative character name can be algorithmic. */
+    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+        /* zero-terminate */
+        if(bufferLength>0) {
+            *buffer=0;
+        }
+        return 0;
+    }
+
+    switch(range->type) {
+    case 0: {
+        /* name = prefix hex-digits */
+        const char *s=(const char *)(range+1);
+        char c;
+
+        uint16_t i, count;
+
+        /* copy prefix */
+        while((c=*s++)!=0) {
+            WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+        }
+
+        /* write hexadecimal code point value */
+        count=range->variant;
+
+        /* zero-terminate */
+        if(count<bufferLength) {
+            buffer[count]=0;
+        }
+
+        for(i=count; i>0;) {
+            if(--i<bufferLength) {
+                c=(char)(code&0xf);
+                if(c<10) {
+                    c+='0';
+                } else {
+                    c+='A'-10;
+                }
+                buffer[i]=c;
+            }
+            code>>=4;
+        }
+
+        bufferPos+=count;
+        break;
+    }
+    case 1: {
+        /* name = prefix factorized-elements */
+        uint16_t indexes[8];
+        const uint16_t *factors=(const uint16_t *)(range+1);
+        uint16_t count=range->variant;
+        const char *s=(const char *)(factors+count);
+        char c;
+
+        /* copy prefix */
+        while((c=*s++)!=0) {
+            WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+        }
+
+        bufferPos+=writeFactorSuffix(factors, count,
+                                     s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
+        break;
+    }
+    default:
+        /* undefined type */
+        /* zero-terminate */
+        if(bufferLength>0) {
+            *buffer=0;
+        }
+        break;
+    }
+
+    return bufferPos;
+}
+
+/*
+ * Important: enumAlgNames() and findAlgName() are almost the same.
+ * Any fix must be applied to both.
+ */
+static UBool
+enumAlgNames(AlgorithmicRange *range,
+             UChar32 start, UChar32 limit,
+             UEnumCharNamesFn *fn, void *context,
+             UCharNameChoice nameChoice) {
+    char buffer[200];
+    uint16_t length;
+
+    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+        return TRUE;
+    }
+
+    switch(range->type) {
+    case 0: {
+        char *s, *end;
+        char c;
+
+        /* get the full name of the start character */
+        length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
+        if(length<=0) {
+            return TRUE;
+        }
+
+        /* call the enumerator function with this first character */
+        if(!fn(context, start, nameChoice, buffer, length)) {
+            return FALSE;
+        }
+
+        /* go to the end of the name; all these names have the same length */
+        end=buffer;
+        while(*end!=0) {
+            ++end;
+        }
+
+        /* enumerate the rest of the names */
+        while(++start<limit) {
+            /* increment the hexadecimal number on a character-basis */
+            s=end;
+            for (;;) {
+                c=*--s;
+                if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
+                    *s=(char)(c+1);
+                    break;
+                } else if(c=='9') {
+                    *s='A';
+                    break;
+                } else if(c=='F') {
+                    *s='0';
+                }
+            }
+
+            if(!fn(context, start, nameChoice, buffer, length)) {
+                return FALSE;
+            }
+        }
+        break;
+    }
+    case 1: {
+        uint16_t indexes[8];
+        const char *elementBases[8], *elements[8];
+        const uint16_t *factors=(const uint16_t *)(range+1);
+        uint16_t count=range->variant;
+        const char *s=(const char *)(factors+count);
+        char *suffix, *t;
+        uint16_t prefixLength, i, idx;
+
+        char c;
+
+        /* name = prefix factorized-elements */
+
+        /* copy prefix */
+        suffix=buffer;
+        prefixLength=0;
+        while((c=*s++)!=0) {
+            *suffix++=c;
+            ++prefixLength;
+        }
+
+        /* append the suffix of the start character */
+        length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
+                                              s, (uint32_t)start-range->start,
+                                              indexes, elementBases, elements,
+                                              suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
+
+        /* call the enumerator function with this first character */
+        if(!fn(context, start, nameChoice, buffer, length)) {
+            return FALSE;
+        }
+
+        /* enumerate the rest of the names */
+        while(++start<limit) {
+            /* increment the indexes in lexical order bound by the factors */
+            i=count;
+            for (;;) {
+                idx=(uint16_t)(indexes[--i]+1);
+                if(idx<factors[i]) {
+                    /* skip one index and its element string */
+                    indexes[i]=idx;
+                    s=elements[i];
+                    while(*s++!=0) {
+                    }
+                    elements[i]=s;
+                    break;
+                } else {
+                    /* reset this index to 0 and its element string to the first one */
+                    indexes[i]=0;
+                    elements[i]=elementBases[i];
+                }
+            }
+
+            /* to make matters a little easier, just append all elements to the suffix */
+            t=suffix;
+            length=prefixLength;
+            for(i=0; i<count; ++i) {
+                s=elements[i];
+                while((c=*s++)!=0) {
+                    *t++=c;
+                    ++length;
+                }
+            }
+            /* zero-terminate */
+            *t=0;
+
+            if(!fn(context, start, nameChoice, buffer, length)) {
+                return FALSE;
+            }
+        }
+        break;
+    }
+    default:
+        /* undefined type */
+        break;
+    }
+
+    return TRUE;
+}
+
+/*
+ * findAlgName() is almost the same as enumAlgNames() except that it
+ * returns the code point for a name if it fits into the range.
+ * It returns 0xffff otherwise.
+ */
+static UChar32
+findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
+    UChar32 code;
+
+    if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
+        return 0xffff;
+    }
+
+    switch(range->type) {
+    case 0: {
+        /* name = prefix hex-digits */
+        const char *s=(const char *)(range+1);
+        char c;
+
+        uint16_t i, count;
+
+        /* compare prefix */
+        while((c=*s++)!=0) {
+            if((char)c!=*otherName++) {
+                return 0xffff;
+            }
+        }
+
+        /* read hexadecimal code point value */
+        count=range->variant;
+        code=0;
+        for(i=0; i<count; ++i) {
+            c=*otherName++;
+            if('0'<=c && c<='9') {
+                code=(code<<4)|(c-'0');
+            } else if('A'<=c && c<='F') {
+                code=(code<<4)|(c-'A'+10);
+            } else {
+                return 0xffff;
+            }
+        }
+
+        /* does it fit into the range? */
+        if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
+            return code;
+        }
+        break;
+    }
+    case 1: {
+        char buffer[64];
+        uint16_t indexes[8];
+        const char *elementBases[8], *elements[8];
+        const uint16_t *factors=(const uint16_t *)(range+1);
+        uint16_t count=range->variant;
+        const char *s=(const char *)(factors+count), *t;
+        UChar32 start, limit;
+        uint16_t i, idx;
+
+        char c;
+
+        /* name = prefix factorized-elements */
+
+        /* compare prefix */
+        while((c=*s++)!=0) {
+            if((char)c!=*otherName++) {
+                return 0xffff;
+            }
+        }
+
+        start=(UChar32)range->start;
+        limit=(UChar32)(range->end+1);
+
+        /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
+        writeFactorSuffix(factors, count, s, 0,
+                          indexes, elementBases, elements, buffer, sizeof(buffer));
+
+        /* compare the first suffix */
+        if(0==uprv_strcmp(otherName, buffer)) {
+            return start;
+        }
+
+        /* enumerate and compare the rest of the suffixes */
+        while(++start<limit) {
+            /* increment the indexes in lexical order bound by the factors */
+            i=count;
+            for (;;) {
+                idx=(uint16_t)(indexes[--i]+1);
+                if(idx<factors[i]) {
+                    /* skip one index and its element string */
+                    indexes[i]=idx;
+                    s=elements[i];
+                    while(*s++!=0) {}
+                    elements[i]=s;
+                    break;
+                } else {
+                    /* reset this index to 0 and its element string to the first one */
+                    indexes[i]=0;
+                    elements[i]=elementBases[i];
+                }
+            }
+
+            /* to make matters a little easier, just compare all elements of the suffix */
+            t=otherName;
+            for(i=0; i<count; ++i) {
+                s=elements[i];
+                while((c=*s++)!=0) {
+                    if(c!=*t++) {
+                        s=""; /* does not match */
+                        i=99;
+                    }
+                }
+            }
+            if(i<99 && *t==0) {
+                return start;
+            }
+        }
+        break;
+    }
+    default:
+        /* undefined type */
+        break;
+    }
+
+    return 0xffff;
+}
+
+/* sets of name characters, maximum name lengths ---------------------------- */
+
+#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
+#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
+
+static int32_t
+calcStringSetLength(uint32_t set[8], const char *s) {
+    int32_t length=0;
+    char c;
+
+    while((c=*s++)!=0) {
+        SET_ADD(set, c);
+        ++length;
+    }
+    return length;
+}
+
+static int32_t
+calcAlgNameSetsLengths(int32_t maxNameLength) {
+    AlgorithmicRange *range;
+    uint32_t *p;
+    uint32_t rangeCount;
+    int32_t length;
+
+    /* enumerate algorithmic ranges */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    rangeCount=*p;
+    range=(AlgorithmicRange *)(p+1);
+    while(rangeCount>0) {
+        switch(range->type) {
+        case 0:
+            /* name = prefix + (range->variant times) hex-digits */
+            /* prefix */
+            length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
+            if(length>maxNameLength) {
+                maxNameLength=length;
+            }
+            break;
+        case 1: {
+            /* name = prefix factorized-elements */
+            const uint16_t *factors=(const uint16_t *)(range+1);
+            const char *s;
+            int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
+
+            /* prefix length */
+            s=(const char *)(factors+count);
+            length=calcStringSetLength(gNameSet, s);
+            s+=length+1; /* start of factor suffixes */
+
+            /* get the set and maximum factor suffix length for each factor */
+            for(i=0; i<count; ++i) {
+                maxFactorLength=0;
+                for(factor=factors[i]; factor>0; --factor) {
+                    factorLength=calcStringSetLength(gNameSet, s);
+                    s+=factorLength+1;
+                    if(factorLength>maxFactorLength) {
+                        maxFactorLength=factorLength;
+                    }
+                }
+                length+=maxFactorLength;
+            }
+
+            if(length>maxNameLength) {
+                maxNameLength=length;
+            }
+            break;
+        }
+        default:
+            /* unknown type */
+            break;
+        }
+
+        range=(AlgorithmicRange *)((uint8_t *)range+range->size);
+        --rangeCount;
+    }
+    return maxNameLength;
+}
+
+static int32_t
+calcExtNameSetsLengths(int32_t maxNameLength) {
+    int32_t i, length;
+
+    for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
+        /*
+         * for each category, count the length of the category name
+         * plus 9=
+         * 2 for <>
+         * 1 for -
+         * 6 for most hex digits per code point
+         */
+        length=9+calcStringSetLength(gNameSet, charCatNames[i]);
+        if(length>maxNameLength) {
+            maxNameLength=length;
+        }
+    }
+    return maxNameLength;
+}
+
+static int32_t
+calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
+                  uint32_t set[8],
+                  const uint8_t **pLine, const uint8_t *lineLimit) {
+    const uint8_t *line=*pLine;
+    int32_t length=0, tokenLength;
+    uint16_t c, token;
+
+    while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
+        if(c>=tokenCount) {
+            /* implicit letter */
+            SET_ADD(set, c);
+            ++length;
+        } else {
+            token=tokens[c];
+            if(token==(uint16_t)(-2)) {
+                /* this is a lead byte for a double-byte token */
+                c=c<<8|*line++;
+                token=tokens[c];
+            }
+            if(token==(uint16_t)(-1)) {
+                /* explicit letter */
+                SET_ADD(set, c);
+                ++length;
+            } else {
+                /* count token word */
+                if(tokenLengths!=NULL) {
+                    /* use cached token length */
+                    tokenLength=tokenLengths[c];
+                    if(tokenLength==0) {
+                        tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
+                        tokenLengths[c]=(int8_t)tokenLength;
+                    }
+                } else {
+                    tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
+                }
+                length+=tokenLength;
+            }
+        }
+    }
+
+    *pLine=line;
+    return length;
+}
+
+static void
+calcGroupNameSetsLengths(int32_t maxNameLength) {
+    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+
+    uint16_t *tokens=(uint16_t *)uCharNames+8;
+    uint16_t tokenCount=*tokens++;
+    uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
+
+    int8_t *tokenLengths;
+
+    const uint16_t *group;
+    const uint8_t *s, *line, *lineLimit;
+
+    int32_t groupCount, lineNumber, length;
+
+    tokenLengths=(int8_t *)uprv_malloc(tokenCount);
+    if(tokenLengths!=NULL) {
+        uprv_memset(tokenLengths, 0, tokenCount);
+    }
+
+    group=GET_GROUPS(uCharNames);
+    groupCount=*group++;
+
+    /* enumerate all groups */
+    while(groupCount>0) {
+        s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
+        s=expandGroupLengths(s, offsets, lengths);
+
+        /* enumerate all lines in each group */
+        for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
+            line=s+offsets[lineNumber];
+            length=lengths[lineNumber];
+            if(length==0) {
+                continue;
+            }
+
+            lineLimit=line+length;
+
+            /* read regular name */
+            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
+            if(length>maxNameLength) {
+                maxNameLength=length;
+            }
+            if(line==lineLimit) {
+                continue;
+            }
+
+            /* read Unicode 1.0 name */
+            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
+            if(length>maxNameLength) {
+                maxNameLength=length;
+            }
+            if(line==lineLimit) {
+                continue;
+            }
+
+            /* read ISO comment */
+            /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
+        }
+
+        group=NEXT_GROUP(group);
+        --groupCount;
+    }
+
+    if(tokenLengths!=NULL) {
+        uprv_free(tokenLengths);
+    }
+
+    /* set gMax... - name length last for threading */
+    gMaxNameLength=maxNameLength;
+}
+
+static UBool
+calcNameSetsLengths(UErrorCode *pErrorCode) {
+    static const char extChars[]="0123456789ABCDEF<>-";
+    int32_t i, maxNameLength;
+
+    if(gMaxNameLength!=0) {
+        return TRUE;
+    }
+
+    if(!isDataLoaded(pErrorCode)) {
+        return FALSE;
+    }
+
+    /* set hex digits, used in various names, and <>-, used in extended names */
+    for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
+        SET_ADD(gNameSet, extChars[i]);
+    }
+
+    /* set sets and lengths from algorithmic names */
+    maxNameLength=calcAlgNameSetsLengths(0);
+
+    /* set sets and lengths from extended names */
+    maxNameLength=calcExtNameSetsLengths(maxNameLength);
+
+    /* set sets and lengths from group names, set global maximum values */
+    calcGroupNameSetsLengths(maxNameLength);
+
+    return TRUE;
+}
+
+U_NAMESPACE_END
+
+/* public API --------------------------------------------------------------- */
+
+U_NAMESPACE_USE
+
+U_CAPI int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+           char *buffer, int32_t bufferLength,
+           UErrorCode *pErrorCode) {
+     AlgorithmicRange *algRange;
+    uint32_t *p;
+    uint32_t i;
+    int32_t length;
+
+    /* check the argument values */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
+              bufferLength<0 || (bufferLength>0 && buffer==NULL)
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
+        return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
+    }
+
+    length=0;
+
+    /* try algorithmic names first */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    i=*p;
+    algRange=(AlgorithmicRange *)(p+1);
+    while(i>0) {
+        if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
+            length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+            break;
+        }
+        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+        --i;
+    }
+
+    if(i==0) {
+        if (nameChoice == U_EXTENDED_CHAR_NAME) {
+            length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
+            if (!length) {
+                /* extended character name */
+                length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
+            }
+        } else {
+            /* normal character name */
+            length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+        }
+    }
+
+    return u_terminateChars(buffer, bufferLength, length, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_getISOComment(UChar32 /*c*/,
+                char *dest, int32_t destCapacity,
+                UErrorCode *pErrorCode) {
+    /* check the argument values */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    return u_terminateChars(dest, destCapacity, 0, pErrorCode);
+}
+
+U_CAPI UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+               const char *name,
+               UErrorCode *pErrorCode) {
     char upper[120] = {0};
     char lower[120] = {0};
-    FindName findName; 
-    AlgorithmicRange *algRange; 
-    uint32_t *p; 
-    uint32_t i; 
-    UChar32 cp = 0; 
-    char c0; 
+    FindName findName;
+    AlgorithmicRange *algRange;
+    uint32_t *p;
+    uint32_t i;
+    UChar32 cp = 0;
+    char c0;
     static constexpr UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
- 
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 
-        return error; 
-    } 
- 
-    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-        return error; 
-    } 
- 
-    if(!isDataLoaded(pErrorCode)) { 
-        return error; 
-    } 
- 
-    /* construct the uppercase and lowercase of the name first */ 
-    for(i=0; i<sizeof(upper); ++i) { 
-        if((c0=*name++)!=0) { 
-            upper[i]=uprv_toupper(c0); 
-            lower[i]=uprv_tolower(c0); 
-        } else { 
-            upper[i]=lower[i]=0; 
-            break; 
-        } 
-    } 
-    if(i==sizeof(upper)) { 
-        /* name too long, there is no such character */ 
-        *pErrorCode = U_ILLEGAL_CHAR_FOUND; 
-        return error; 
-    } 
-    // i==strlen(name)==strlen(lower)==strlen(upper) 
- 
-    /* try extended names first */ 
-    if (lower[0] == '<') { 
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return error;
+    }
+
+    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return error;
+    }
+
+    if(!isDataLoaded(pErrorCode)) {
+        return error;
+    }
+
+    /* construct the uppercase and lowercase of the name first */
+    for(i=0; i<sizeof(upper); ++i) {
+        if((c0=*name++)!=0) {
+            upper[i]=uprv_toupper(c0);
+            lower[i]=uprv_tolower(c0);
+        } else {
+            upper[i]=lower[i]=0;
+            break;
+        }
+    }
+    if(i==sizeof(upper)) {
+        /* name too long, there is no such character */
+        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+        return error;
+    }
+    // i==strlen(name)==strlen(lower)==strlen(upper)
+
+    /* try extended names first */
+    if (lower[0] == '<') {
         if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
-            // Parse a string like "<category-HHHH>" where HHHH is a hex code point. 
+            // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
             uint32_t limit = i;
             while (i >= 3 && lower[--i] != '-') {}
- 
+
             // There should be 1 to 8 hex digits.
             int32_t hexLength = limit - (i + 1);
             if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
                 uint32_t cIdx;
- 
+
                 lower[i] = 0;
- 
+
                 for (++i; i < limit; ++i) {
                     if (lower[i] >= '0' && lower[i] <= '9') {
                         cp = (cp << 4) + lower[i] - '0';
@@ -1581,528 +1581,528 @@ u_charFromName(UCharNameChoice nameChoice,
                     } else {
                         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
                         return error;
-                    } 
+                    }
                     // Prevent signed-integer overflow and out-of-range code points.
                     if (cp > UCHAR_MAX_VALUE) {
                         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
                         return error;
                     }
                 }
- 
+
                 /* Now validate the category name.
                    We could use a binary search, or a trie, if
                    we really wanted to. */
                 uint8_t cat = getCharCat(cp);
                 for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
- 
+
                     if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
                         if (cat == cIdx) {
                             return cp;
-                        } 
+                        }
                         break;
-                    } 
-                } 
-            } 
-        } 
- 
-        *pErrorCode = U_ILLEGAL_CHAR_FOUND; 
-        return error; 
-    } 
- 
-    /* try algorithmic names now */ 
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 
-    i=*p; 
-    algRange=(AlgorithmicRange *)(p+1); 
-    while(i>0) { 
-        if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) { 
-            return cp; 
-        } 
-        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 
-        --i; 
-    } 
- 
-    /* normal character name */ 
-    findName.otherName=upper; 
-    findName.code=error; 
-    enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice); 
-    if (findName.code == error) { 
-         *pErrorCode = U_ILLEGAL_CHAR_FOUND; 
-    } 
-    return findName.code; 
-} 
- 
-U_CAPI void U_EXPORT2 
-u_enumCharNames(UChar32 start, UChar32 limit, 
-                UEnumCharNamesFn *fn, 
-                void *context, 
-                UCharNameChoice nameChoice, 
-                UErrorCode *pErrorCode) { 
-    AlgorithmicRange *algRange; 
-    uint32_t *p; 
-    uint32_t i; 
- 
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 
-        return; 
-    } 
- 
-    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) { 
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 
-        return; 
-    } 
- 
-    if((uint32_t) limit > UCHAR_MAX_VALUE + 1) { 
-        limit = UCHAR_MAX_VALUE + 1; 
-    } 
-    if((uint32_t)start>=(uint32_t)limit) { 
-        return; 
-    } 
- 
-    if(!isDataLoaded(pErrorCode)) { 
-        return; 
-    } 
- 
-    /* interleave the data-driven ones with the algorithmic ones */ 
-    /* iterate over all algorithmic ranges; assume that they are in ascending order */ 
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset); 
-    i=*p; 
-    algRange=(AlgorithmicRange *)(p+1); 
-    while(i>0) { 
-        /* enumerate the character names before the current algorithmic range */ 
-        /* here: start<limit */ 
-        if((uint32_t)start<algRange->start) { 
-            if((uint32_t)limit<=algRange->start) { 
-                enumNames(uCharNames, start, limit, fn, context, nameChoice); 
-                return; 
-            } 
-            if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) { 
-                return; 
-            } 
-            start=(UChar32)algRange->start; 
-        } 
-        /* enumerate the character names in the current algorithmic range */ 
-        /* here: algRange->start<=start<limit */ 
-        if((uint32_t)start<=algRange->end) { 
-            if((uint32_t)limit<=(algRange->end+1)) { 
-                enumAlgNames(algRange, start, limit, fn, context, nameChoice); 
-                return; 
-            } 
-            if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) { 
-                return; 
-            } 
-            start=(UChar32)algRange->end+1; 
-        } 
-        /* continue to the next algorithmic range (here: start<limit) */ 
-        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size); 
-        --i; 
-    } 
-    /* enumerate the character names after the last algorithmic range */ 
-    enumNames(uCharNames, start, limit, fn, context, nameChoice); 
-} 
- 
-U_CAPI int32_t U_EXPORT2 
-uprv_getMaxCharNameLength() { 
-    UErrorCode errorCode=U_ZERO_ERROR; 
-    if(calcNameSetsLengths(&errorCode)) { 
-        return gMaxNameLength; 
-    } else { 
-        return 0; 
-    } 
-} 
- 
-/** 
- * Converts the char set cset into a Unicode set uset. 
- * @param cset Set of 256 bit flags corresponding to a set of chars. 
- * @param uset USet to receive characters. Existing contents are deleted. 
- */ 
-static void 
-charSetToUSet(uint32_t cset[8], const USetAdder *sa) { 
-    UChar us[256]; 
-    char cs[256]; 
- 
-    int32_t i, length; 
-    UErrorCode errorCode; 
- 
-    errorCode=U_ZERO_ERROR; 
- 
-    if(!calcNameSetsLengths(&errorCode)) { 
-        return; 
-    } 
- 
-    /* build a char string with all chars that are used in character names */ 
-    length=0; 
-    for(i=0; i<256; ++i) { 
-        if(SET_CONTAINS(cset, i)) { 
-            cs[length++]=(char)i; 
-        } 
-    } 
- 
-    /* convert the char string to a UChar string */ 
-    u_charsToUChars(cs, us, length); 
- 
-    /* add each UChar to the USet */ 
-    for(i=0; i<length; ++i) { 
-        if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */ 
-            sa->add(sa->set, us[i]); 
-        } 
-    } 
-} 
- 
-/** 
- * Fills set with characters that are used in Unicode character names. 
- * @param set USet to receive characters. 
- */ 
-U_CAPI void U_EXPORT2 
-uprv_getCharNameCharacters(const USetAdder *sa) { 
-    charSetToUSet(gNameSet, sa); 
-} 
- 
-/* data swapping ------------------------------------------------------------ */ 
- 
-/* 
- * The token table contains non-negative entries for token bytes, 
- * and -1 for bytes that represent themselves in the data file's charset. 
- * -2 entries are used for lead bytes. 
- * 
- * Direct bytes (-1 entries) must be translated from the input charset family 
- * to the output charset family. 
- * makeTokenMap() writes a permutation mapping for this. 
- * Use it once for single-/lead-byte tokens and once more for all trail byte 
- * tokens. (';' is an unused trail byte marked with -1.) 
- */ 
-static void 
-makeTokenMap(const UDataSwapper *ds, 
-             int16_t tokens[], uint16_t tokenCount, 
-             uint8_t map[256], 
-             UErrorCode *pErrorCode) { 
-    UBool usedOutChar[256]; 
-    uint16_t i, j; 
-    uint8_t c1, c2; 
- 
-    if(U_FAILURE(*pErrorCode)) { 
-        return; 
-    } 
- 
-    if(ds->inCharset==ds->outCharset) { 
-        /* Same charset family: identity permutation */ 
-        for(i=0; i<256; ++i) { 
-            map[i]=(uint8_t)i; 
-        } 
-    } else { 
-        uprv_memset(map, 0, 256); 
-        uprv_memset(usedOutChar, 0, 256); 
- 
-        if(tokenCount>256) { 
-            tokenCount=256; 
-        } 
- 
-        /* set the direct bytes (byte 0 always maps to itself) */ 
-        for(i=1; i<tokenCount; ++i) { 
-            if(tokens[i]==-1) { 
-                /* convert the direct byte character */ 
-                c1=(uint8_t)i; 
-                ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode); 
-                if(U_FAILURE(*pErrorCode)) { 
-                    udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n", 
-                                     i, ds->inCharset); 
-                    return; 
-                } 
- 
-                /* enter the converted character into the map and mark it used */ 
-                map[c1]=c2; 
-                usedOutChar[c2]=TRUE; 
-            } 
-        } 
- 
-        /* set the mappings for the rest of the permutation */ 
-        for(i=j=1; i<tokenCount; ++i) { 
-            /* set mappings that were not set for direct bytes */ 
-            if(map[i]==0) { 
-                /* set an output byte value that was not used as an output byte above */ 
-                while(usedOutChar[j]) { 
-                    ++j; 
-                } 
-                map[i]=(uint8_t)j++; 
-            } 
-        } 
- 
-        /* 
-         * leave mappings at tokenCount and above unset if tokenCount<256 
-         * because they won't be used 
-         */ 
-    } 
-} 
- 
-U_CAPI int32_t U_EXPORT2 
-uchar_swapNames(const UDataSwapper *ds, 
-                const void *inData, int32_t length, void *outData, 
-                UErrorCode *pErrorCode) { 
-    const UDataInfo *pInfo; 
-    int32_t headerSize; 
- 
-    const uint8_t *inBytes; 
-    uint8_t *outBytes; 
- 
-    uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset, 
-             offset, i, count, stringsCount; 
- 
-    const AlgorithmicRange *inRange; 
-    AlgorithmicRange *outRange; 
- 
-    /* udata_swapDataHeader checks the arguments */ 
-    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 
-        return 0; 
-    } 
- 
-    /* check data format and format version */ 
-    pInfo=(const UDataInfo *)((const char *)inData+4); 
-    if(!( 
-        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */ 
-        pInfo->dataFormat[1]==0x6e && 
-        pInfo->dataFormat[2]==0x61 && 
-        pInfo->dataFormat[3]==0x6d && 
-        pInfo->formatVersion[0]==1 
-    )) { 
-        udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n", 
-                         pInfo->dataFormat[0], pInfo->dataFormat[1], 
-                         pInfo->dataFormat[2], pInfo->dataFormat[3], 
-                         pInfo->formatVersion[0]); 
-        *pErrorCode=U_UNSUPPORTED_ERROR; 
-        return 0; 
-    } 
- 
-    inBytes=(const uint8_t *)inData+headerSize; 
-    outBytes=(uint8_t *)outData+headerSize; 
-    if(length<0) { 
-        algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]); 
-    } else { 
-        length-=headerSize; 
-        if( length<20 || 
-            (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3])) 
-        ) { 
-            udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n", 
-                             length); 
-            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 
-            return 0; 
-        } 
-    } 
- 
-    if(length<0) { 
-        /* preflighting: iterate through algorithmic ranges */ 
-        offset=algNamesOffset; 
-        count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); 
-        offset+=4; 
- 
-        for(i=0; i<count; ++i) { 
-            inRange=(const AlgorithmicRange *)(inBytes+offset); 
-            offset+=ds->readUInt16(inRange->size); 
-        } 
-    } else { 
-        /* swap data */ 
-        const uint16_t *p; 
-        uint16_t *q, *temp; 
- 
-        int16_t tokens[512]; 
-        uint16_t tokenCount; 
- 
-        uint8_t map[256], trailMap[256]; 
- 
-        /* copy the data for inaccessible bytes */ 
-        if(inBytes!=outBytes) { 
-            uprv_memcpy(outBytes, inBytes, length); 
-        } 
- 
-        /* the initial 4 offsets first */ 
-        tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]); 
-        groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]); 
-        groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]); 
-        ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode); 
- 
-        /* 
-         * now the tokens table 
-         * it needs to be permutated along with the compressed name strings 
-         */ 
-        p=(const uint16_t *)(inBytes+16); 
-        q=(uint16_t *)(outBytes+16); 
- 
-        /* read and swap the tokenCount */ 
-        tokenCount=ds->readUInt16(*p); 
-        ds->swapArray16(ds, p, 2, q, pErrorCode); 
-        ++p; 
-        ++q; 
- 
-        /* read the first 512 tokens and make the token maps */ 
-        if(tokenCount<=512) { 
-            count=tokenCount; 
-        } else { 
-            count=512; 
-        } 
-        for(i=0; i<count; ++i) { 
-            tokens[i]=udata_readInt16(ds, p[i]); 
-        } 
-        for(; i<512; ++i) { 
-            tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */ 
-        } 
-        makeTokenMap(ds, tokens, tokenCount, map, pErrorCode); 
-        makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode); 
-        if(U_FAILURE(*pErrorCode)) { 
-            return 0; 
-        } 
- 
-        /* 
-         * swap and permutate the tokens 
-         * go through a temporary array to support in-place swapping 
-         */ 
-        temp=(uint16_t *)uprv_malloc(tokenCount*2); 
-        if(temp==NULL) { 
-            udata_printError(ds, "out of memory swapping %u unames.icu tokens\n", 
-                             tokenCount); 
-            *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 
-            return 0; 
-        } 
- 
-        /* swap and permutate single-/lead-byte tokens */ 
-        for(i=0; i<tokenCount && i<256; ++i) { 
-            ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode); 
-        } 
- 
-        /* swap and permutate trail-byte tokens */ 
-        for(; i<tokenCount; ++i) { 
-            ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode); 
-        } 
- 
-        /* copy the result into the output and free the temporary array */ 
-        uprv_memcpy(q, temp, tokenCount*2); 
-        uprv_free(temp); 
- 
-        /* 
-         * swap the token strings but not a possible padding byte after 
-         * the terminating NUL of the last string 
-         */ 
-        udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset), 
-                                    outBytes+tokenStringOffset, pErrorCode); 
-        if(U_FAILURE(*pErrorCode)) { 
-            udata_printError(ds, "uchar_swapNames(token strings) failed\n"); 
-            return 0; 
-        } 
- 
-        /* swap the group table */ 
-        count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset))); 
-        ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2), 
-                           outBytes+groupsOffset, pErrorCode); 
- 
-        /* 
-         * swap the group strings 
-         * swap the string bytes but not the nibble-encoded string lengths 
-         */ 
-        if(ds->inCharset!=ds->outCharset) { 
-            uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1]; 
- 
-            const uint8_t *inStrings, *nextInStrings; 
-            uint8_t *outStrings; 
- 
-            uint8_t c; 
- 
-            inStrings=inBytes+groupStringOffset; 
-            outStrings=outBytes+groupStringOffset; 
- 
-            stringsCount=algNamesOffset-groupStringOffset; 
- 
-            /* iterate through string groups until only a few padding bytes are left */ 
-            while(stringsCount>32) { 
-                nextInStrings=expandGroupLengths(inStrings, offsets, lengths); 
- 
-                /* move past the length bytes */ 
-                stringsCount-=(uint32_t)(nextInStrings-inStrings); 
-                outStrings+=nextInStrings-inStrings; 
-                inStrings=nextInStrings; 
- 
-                count=offsets[31]+lengths[31]; /* total number of string bytes in this group */ 
-                stringsCount-=count; 
- 
-                /* swap the string bytes using map[] and trailMap[] */ 
-                while(count>0) { 
-                    c=*inStrings++; 
-                    *outStrings++=map[c]; 
-                    if(tokens[c]!=-2) { 
-                        --count; 
-                    } else { 
-                        /* token lead byte: swap the trail byte, too */ 
-                        *outStrings++=trailMap[*inStrings++]; 
-                        count-=2; 
-                    } 
-                } 
-            } 
-        } 
- 
-        /* swap the algorithmic ranges */ 
-        offset=algNamesOffset; 
-        count=ds->readUInt32(*((const uint32_t *)(inBytes+offset))); 
-        ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode); 
-        offset+=4; 
- 
-        for(i=0; i<count; ++i) { 
-            if(offset>(uint32_t)length) { 
-                udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n", 
-                                 length, i); 
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 
-                return 0; 
-            } 
- 
-            inRange=(const AlgorithmicRange *)(inBytes+offset); 
-            outRange=(AlgorithmicRange *)(outBytes+offset); 
-            offset+=ds->readUInt16(inRange->size); 
- 
-            ds->swapArray32(ds, inRange, 8, outRange, pErrorCode); 
-            ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode); 
-            switch(inRange->type) { 
-            case 0: 
-                /* swap prefix string */ 
-                ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)), 
-                                    outRange+1, pErrorCode); 
-                if(U_FAILURE(*pErrorCode)) { 
-                    udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n", 
-                                     i); 
-                    return 0; 
-                } 
-                break; 
-            case 1: 
-                { 
-                    /* swap factors and the prefix and factor strings */ 
-                    uint32_t factorsCount; 
- 
-                    factorsCount=inRange->variant; 
-                    p=(const uint16_t *)(inRange+1); 
-                    q=(uint16_t *)(outRange+1); 
-                    ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode); 
- 
-                    /* swap the strings, up to the last terminating NUL */ 
-                    p+=factorsCount; 
-                    q+=factorsCount; 
-                    stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p); 
-                    while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) { 
-                        --stringsCount; 
-                    } 
-                    ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode); 
-                } 
-                break; 
-            default: 
-                udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n", 
-                                 inRange->type, i); 
-                *pErrorCode=U_UNSUPPORTED_ERROR; 
-                return 0; 
-            } 
-        } 
-    } 
- 
-    return headerSize+(int32_t)offset; 
-} 
- 
-/* 
- * Hey, Emacs, please set the following: 
- * 
- * Local Variables: 
- * indent-tabs-mode: nil 
- * End: 
- * 
- */ 
+                    }
+                }
+            }
+        }
+
+        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+        return error;
+    }
+
+    /* try algorithmic names now */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    i=*p;
+    algRange=(AlgorithmicRange *)(p+1);
+    while(i>0) {
+        if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
+            return cp;
+        }
+        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+        --i;
+    }
+
+    /* normal character name */
+    findName.otherName=upper;
+    findName.code=error;
+    enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
+    if (findName.code == error) {
+         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+    }
+    return findName.code;
+}
+
+U_CAPI void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+                UEnumCharNamesFn *fn,
+                void *context,
+                UCharNameChoice nameChoice,
+                UErrorCode *pErrorCode) {
+    AlgorithmicRange *algRange;
+    uint32_t *p;
+    uint32_t i;
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
+        limit = UCHAR_MAX_VALUE + 1;
+    }
+    if((uint32_t)start>=(uint32_t)limit) {
+        return;
+    }
+
+    if(!isDataLoaded(pErrorCode)) {
+        return;
+    }
+
+    /* interleave the data-driven ones with the algorithmic ones */
+    /* iterate over all algorithmic ranges; assume that they are in ascending order */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    i=*p;
+    algRange=(AlgorithmicRange *)(p+1);
+    while(i>0) {
+        /* enumerate the character names before the current algorithmic range */
+        /* here: start<limit */
+        if((uint32_t)start<algRange->start) {
+            if((uint32_t)limit<=algRange->start) {
+                enumNames(uCharNames, start, limit, fn, context, nameChoice);
+                return;
+            }
+            if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
+                return;
+            }
+            start=(UChar32)algRange->start;
+        }
+        /* enumerate the character names in the current algorithmic range */
+        /* here: algRange->start<=start<limit */
+        if((uint32_t)start<=algRange->end) {
+            if((uint32_t)limit<=(algRange->end+1)) {
+                enumAlgNames(algRange, start, limit, fn, context, nameChoice);
+                return;
+            }
+            if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
+                return;
+            }
+            start=(UChar32)algRange->end+1;
+        }
+        /* continue to the next algorithmic range (here: start<limit) */
+        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+        --i;
+    }
+    /* enumerate the character names after the last algorithmic range */
+    enumNames(uCharNames, start, limit, fn, context, nameChoice);
+}
+
+U_CAPI int32_t U_EXPORT2
+uprv_getMaxCharNameLength() {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    if(calcNameSetsLengths(&errorCode)) {
+        return gMaxNameLength;
+    } else {
+        return 0;
+    }
+}
+
+/**
+ * Converts the char set cset into a Unicode set uset.
+ * @param cset Set of 256 bit flags corresponding to a set of chars.
+ * @param uset USet to receive characters. Existing contents are deleted.
+ */
+static void
+charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
+    UChar us[256];
+    char cs[256];
+
+    int32_t i, length;
+    UErrorCode errorCode;
+
+    errorCode=U_ZERO_ERROR;
+
+    if(!calcNameSetsLengths(&errorCode)) {
+        return;
+    }
+
+    /* build a char string with all chars that are used in character names */
+    length=0;
+    for(i=0; i<256; ++i) {
+        if(SET_CONTAINS(cset, i)) {
+            cs[length++]=(char)i;
+        }
+    }
+
+    /* convert the char string to a UChar string */
+    u_charsToUChars(cs, us, length);
+
+    /* add each UChar to the USet */
+    for(i=0; i<length; ++i) {
+        if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
+            sa->add(sa->set, us[i]);
+        }
+    }
+}
+
+/**
+ * Fills set with characters that are used in Unicode character names.
+ * @param set USet to receive characters.
+ */
+U_CAPI void U_EXPORT2
+uprv_getCharNameCharacters(const USetAdder *sa) {
+    charSetToUSet(gNameSet, sa);
+}
+
+/* data swapping ------------------------------------------------------------ */
+
+/*
+ * The token table contains non-negative entries for token bytes,
+ * and -1 for bytes that represent themselves in the data file's charset.
+ * -2 entries are used for lead bytes.
+ *
+ * Direct bytes (-1 entries) must be translated from the input charset family
+ * to the output charset family.
+ * makeTokenMap() writes a permutation mapping for this.
+ * Use it once for single-/lead-byte tokens and once more for all trail byte
+ * tokens. (';' is an unused trail byte marked with -1.)
+ */
+static void
+makeTokenMap(const UDataSwapper *ds,
+             int16_t tokens[], uint16_t tokenCount,
+             uint8_t map[256],
+             UErrorCode *pErrorCode) {
+    UBool usedOutChar[256];
+    uint16_t i, j;
+    uint8_t c1, c2;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    if(ds->inCharset==ds->outCharset) {
+        /* Same charset family: identity permutation */
+        for(i=0; i<256; ++i) {
+            map[i]=(uint8_t)i;
+        }
+    } else {
+        uprv_memset(map, 0, 256);
+        uprv_memset(usedOutChar, 0, 256);
+
+        if(tokenCount>256) {
+            tokenCount=256;
+        }
+
+        /* set the direct bytes (byte 0 always maps to itself) */
+        for(i=1; i<tokenCount; ++i) {
+            if(tokens[i]==-1) {
+                /* convert the direct byte character */
+                c1=(uint8_t)i;
+                ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
+                if(U_FAILURE(*pErrorCode)) {
+                    udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
+                                     i, ds->inCharset);
+                    return;
+                }
+
+                /* enter the converted character into the map and mark it used */
+                map[c1]=c2;
+                usedOutChar[c2]=TRUE;
+            }
+        }
+
+        /* set the mappings for the rest of the permutation */
+        for(i=j=1; i<tokenCount; ++i) {
+            /* set mappings that were not set for direct bytes */
+            if(map[i]==0) {
+                /* set an output byte value that was not used as an output byte above */
+                while(usedOutChar[j]) {
+                    ++j;
+                }
+                map[i]=(uint8_t)j++;
+            }
+        }
+
+        /*
+         * leave mappings at tokenCount and above unset if tokenCount<256
+         * because they won't be used
+         */
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+uchar_swapNames(const UDataSwapper *ds,
+                const void *inData, int32_t length, void *outData,
+                UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize;
+
+    const uint8_t *inBytes;
+    uint8_t *outBytes;
+
+    uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
+             offset, i, count, stringsCount;
+
+    const AlgorithmicRange *inRange;
+    AlgorithmicRange *outRange;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
+        pInfo->dataFormat[1]==0x6e &&
+        pInfo->dataFormat[2]==0x61 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==1
+    )) {
+        udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inBytes=(const uint8_t *)inData+headerSize;
+    outBytes=(uint8_t *)outData+headerSize;
+    if(length<0) {
+        algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
+    } else {
+        length-=headerSize;
+        if( length<20 ||
+            (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
+        ) {
+            udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    if(length<0) {
+        /* preflighting: iterate through algorithmic ranges */
+        offset=algNamesOffset;
+        count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
+        offset+=4;
+
+        for(i=0; i<count; ++i) {
+            inRange=(const AlgorithmicRange *)(inBytes+offset);
+            offset+=ds->readUInt16(inRange->size);
+        }
+    } else {
+        /* swap data */
+        const uint16_t *p;
+        uint16_t *q, *temp;
+
+        int16_t tokens[512];
+        uint16_t tokenCount;
+
+        uint8_t map[256], trailMap[256];
+
+        /* copy the data for inaccessible bytes */
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes, inBytes, length);
+        }
+
+        /* the initial 4 offsets first */
+        tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
+        groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
+        groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
+        ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
+
+        /*
+         * now the tokens table
+         * it needs to be permutated along with the compressed name strings
+         */
+        p=(const uint16_t *)(inBytes+16);
+        q=(uint16_t *)(outBytes+16);
+
+        /* read and swap the tokenCount */
+        tokenCount=ds->readUInt16(*p);
+        ds->swapArray16(ds, p, 2, q, pErrorCode);
+        ++p;
+        ++q;
+
+        /* read the first 512 tokens and make the token maps */
+        if(tokenCount<=512) {
+            count=tokenCount;
+        } else {
+            count=512;
+        }
+        for(i=0; i<count; ++i) {
+            tokens[i]=udata_readInt16(ds, p[i]);
+        }
+        for(; i<512; ++i) {
+            tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
+        }
+        makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
+        makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            return 0;
+        }
+
+        /*
+         * swap and permutate the tokens
+         * go through a temporary array to support in-place swapping
+         */
+        temp=(uint16_t *)uprv_malloc(tokenCount*2);
+        if(temp==NULL) {
+            udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
+                             tokenCount);
+            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
+
+        /* swap and permutate single-/lead-byte tokens */
+        for(i=0; i<tokenCount && i<256; ++i) {
+            ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
+        }
+
+        /* swap and permutate trail-byte tokens */
+        for(; i<tokenCount; ++i) {
+            ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
+        }
+
+        /* copy the result into the output and free the temporary array */
+        uprv_memcpy(q, temp, tokenCount*2);
+        uprv_free(temp);
+
+        /*
+         * swap the token strings but not a possible padding byte after
+         * the terminating NUL of the last string
+         */
+        udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
+                                    outBytes+tokenStringOffset, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            udata_printError(ds, "uchar_swapNames(token strings) failed\n");
+            return 0;
+        }
+
+        /* swap the group table */
+        count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
+        ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
+                           outBytes+groupsOffset, pErrorCode);
+
+        /*
+         * swap the group strings
+         * swap the string bytes but not the nibble-encoded string lengths
+         */
+        if(ds->inCharset!=ds->outCharset) {
+            uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
+
+            const uint8_t *inStrings, *nextInStrings;
+            uint8_t *outStrings;
+
+            uint8_t c;
+
+            inStrings=inBytes+groupStringOffset;
+            outStrings=outBytes+groupStringOffset;
+
+            stringsCount=algNamesOffset-groupStringOffset;
+
+            /* iterate through string groups until only a few padding bytes are left */
+            while(stringsCount>32) {
+                nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
+
+                /* move past the length bytes */
+                stringsCount-=(uint32_t)(nextInStrings-inStrings);
+                outStrings+=nextInStrings-inStrings;
+                inStrings=nextInStrings;
+
+                count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
+                stringsCount-=count;
+
+                /* swap the string bytes using map[] and trailMap[] */
+                while(count>0) {
+                    c=*inStrings++;
+                    *outStrings++=map[c];
+                    if(tokens[c]!=-2) {
+                        --count;
+                    } else {
+                        /* token lead byte: swap the trail byte, too */
+                        *outStrings++=trailMap[*inStrings++];
+                        count-=2;
+                    }
+                }
+            }
+        }
+
+        /* swap the algorithmic ranges */
+        offset=algNamesOffset;
+        count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
+        ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
+        offset+=4;
+
+        for(i=0; i<count; ++i) {
+            if(offset>(uint32_t)length) {
+                udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
+                                 length, i);
+                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                return 0;
+            }
+
+            inRange=(const AlgorithmicRange *)(inBytes+offset);
+            outRange=(AlgorithmicRange *)(outBytes+offset);
+            offset+=ds->readUInt16(inRange->size);
+
+            ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
+            ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
+            switch(inRange->type) {
+            case 0:
+                /* swap prefix string */
+                ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
+                                    outRange+1, pErrorCode);
+                if(U_FAILURE(*pErrorCode)) {
+                    udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
+                                     i);
+                    return 0;
+                }
+                break;
+            case 1:
+                {
+                    /* swap factors and the prefix and factor strings */
+                    uint32_t factorsCount;
+
+                    factorsCount=inRange->variant;
+                    p=(const uint16_t *)(inRange+1);
+                    q=(uint16_t *)(outRange+1);
+                    ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
+
+                    /* swap the strings, up to the last terminating NUL */
+                    p+=factorsCount;
+                    q+=factorsCount;
+                    stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
+                    while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
+                        --stringsCount;
+                    }
+                    ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
+                }
+                break;
+            default:
+                udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
+                                 inRange->type, i);
+                *pErrorCode=U_UNSUPPORTED_ERROR;
+                return 0;
+            }
+        }
+    }
+
+    return headerSize+(int32_t)offset;
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
author	neksard <neksard@yandex-team.ru>	2022-02-10 16:45:33 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:33 +0300
commit	1d9c550e7c38e051d7961f576013a482003a70d9 (patch)
tree	b2cc84ee7850122e7ccf51d0ea21e4fa7e7a5685 /contrib/libs/icu/common/unames.cpp
parent	8f7cf138264e0caa318144bf8a2c950e0b0a8593 (diff)
download	ydb-1d9c550e7c38e051d7961f576013a482003a70d9.tar.gz