diff options
author | mcheshkov <mcheshkov@yandex-team.ru> | 2022-02-10 16:46:16 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:16 +0300 |
commit | 1312621288956f199a5bd5342b0133d4395fa725 (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /contrib/libs/icu/common/ucnvisci.cpp | |
parent | e9d19cec64684c9c1e6b0c98297e5b895cf904fe (diff) | |
download | ydb-1312621288956f199a5bd5342b0133d4395fa725.tar.gz |
Restoring authorship annotation for <mcheshkov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/icu/common/ucnvisci.cpp')
-rw-r--r-- | contrib/libs/icu/common/ucnvisci.cpp | 3270 |
1 files changed, 1635 insertions, 1635 deletions
diff --git a/contrib/libs/icu/common/ucnvisci.cpp b/contrib/libs/icu/common/ucnvisci.cpp index ed76af1474..44a7c05a3c 100644 --- a/contrib/libs/icu/common/ucnvisci.cpp +++ b/contrib/libs/icu/common/ucnvisci.cpp @@ -1,1635 +1,1635 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2000-2016, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* file name: ucnvisci.c -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2001JUN26 -* created by: Ram Viswanadha -* -* Date Name Description -* 24/7/2001 Ram Added support for EXT character handling -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION - -#include "unicode/ucnv.h" -#include "unicode/ucnv_cb.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "ucnv_bld.h" -#include "ucnv_cnv.h" -#include "cstring.h" -#include "uassert.h" - -#define UCNV_OPTIONS_VERSION_MASK 0xf -#define NUKTA 0x093c -#define HALANT 0x094d -#define ZWNJ 0x200c /* Zero Width Non Joiner */ -#define ZWJ 0x200d /* Zero width Joiner */ -#define INVALID_CHAR 0xffff -#define ATR 0xEF /* Attribute code */ -#define EXT 0xF0 /* Extension code */ -#define DANDA 0x0964 -#define DOUBLE_DANDA 0x0965 -#define ISCII_NUKTA 0xE9 -#define ISCII_HALANT 0xE8 -#define ISCII_DANDA 0xEA -#define ISCII_INV 0xD9 -#define ISCII_VOWEL_SIGN_E 0xE0 -#define INDIC_BLOCK_BEGIN 0x0900 -#define INDIC_BLOCK_END 0x0D7F -#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) -#define VOCALLIC_RR 0x0931 -#define LF 0x0A -#define ASCII_END 0xA0 -#define NO_CHAR_MARKER 0xFFFE -#define TELUGU_DELTA DELTA * TELUGU -#define DEV_ABBR_SIGN 0x0970 -#define DEV_ANUDATTA 0x0952 -#define EXT_RANGE_BEGIN 0xA1 -#define EXT_RANGE_END 0xEE - -#define PNJ_DELTA 0x0100 -#define PNJ_BINDI 0x0A02 -#define PNJ_TIPPI 0x0A70 -#define PNJ_SIGN_VIRAMA 0x0A4D -#define PNJ_ADHAK 0x0A71 -#define PNJ_HA 0x0A39 -#define PNJ_RRA 0x0A5C - -typedef enum { - DEVANAGARI =0, - BENGALI, - GURMUKHI, - GUJARATI, - ORIYA, - TAMIL, - TELUGU, - KANNADA, - MALAYALAM, - DELTA=0x80 -}UniLang; - -/** - * Enumeration for switching code pages if <ATR>+<one of below values> - * is encountered - */ -typedef enum { - DEF = 0x40, - RMN = 0x41, - DEV = 0x42, - BNG = 0x43, - TML = 0x44, - TLG = 0x45, - ASM = 0x46, - ORI = 0x47, - KND = 0x48, - MLM = 0x49, - GJR = 0x4A, - PNJ = 0x4B, - ARB = 0x71, - PES = 0x72, - URD = 0x73, - SND = 0x74, - KSM = 0x75, - PST = 0x76 -}ISCIILang; - -typedef enum { - DEV_MASK =0x80, - PNJ_MASK =0x40, - GJR_MASK =0x20, - ORI_MASK =0x10, - BNG_MASK =0x08, - KND_MASK =0x04, - MLM_MASK =0x02, - TML_MASK =0x01, - ZERO =0x00 -}MaskEnum; - -#define ISCII_CNV_PREFIX "ISCII,version=" - -typedef struct { - UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ - UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ - uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ - uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ - uint16_t currentDeltaToUnicode; /* current delta in Indic block */ - MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ - MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ - MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ - UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ - UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ - char name[sizeof(ISCII_CNV_PREFIX) + 1]; - UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ -} UConverterDataISCII; - -typedef struct LookupDataStruct { - UniLang uniLang; - MaskEnum maskEnum; - ISCIILang isciiLang; -} LookupDataStruct; - -static const LookupDataStruct lookupInitialData[]={ - { DEVANAGARI, DEV_MASK, DEV }, - { BENGALI, BNG_MASK, BNG }, - { GURMUKHI, PNJ_MASK, PNJ }, - { GUJARATI, GJR_MASK, GJR }, - { ORIYA, ORI_MASK, ORI }, - { TAMIL, TML_MASK, TML }, - { TELUGU, KND_MASK, TLG }, - { KANNADA, KND_MASK, KND }, - { MALAYALAM, MLM_MASK, MLM } -}; - -/* - * For special handling of certain Gurmukhi characters. - * Bit 0 (value 1): PNJ consonant - * Bit 1 (value 2): PNJ Bindi Tippi - */ -static const uint8_t pnjMap[80] = { - /* 0A00..0A0F */ - 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, - /* 0A10..0A1F */ - 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - /* 0A20..0A2F */ - 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, - /* 0A30..0A3F */ - 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, - /* 0A40..0A4F */ - 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - -static UBool -isPNJConsonant(UChar32 c) { - if (c < 0xa00 || 0xa50 <= c) { - return FALSE; - } else { - return (UBool)(pnjMap[c - 0xa00] & 1); - } -} - -static UBool -isPNJBindiTippi(UChar32 c) { - if (c < 0xa00 || 0xa50 <= c) { - return FALSE; - } else { - return (UBool)(pnjMap[c - 0xa00] >> 1); - } -} -U_CDECL_BEGIN -static void U_CALLCONV -_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { - if(pArgs->onlyTestIsLoadable) { - return; - } - - cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); - - if (cnv->extraInfo != NULL) { - int32_t len=0; - UConverterDataISCII *converterData= - (UConverterDataISCII *) cnv->extraInfo; - converterData->contextCharToUnicode=NO_CHAR_MARKER; - cnv->toUnicodeStatus = missingCharMarker; - converterData->contextCharFromUnicode=0x0000; - converterData->resetToDefaultToUnicode=FALSE; - /* check if the version requested is supported */ - if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { - /* initialize state variables */ - converterData->currentDeltaFromUnicode - = converterData->currentDeltaToUnicode - = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); - - converterData->currentMaskFromUnicode - = converterData->currentMaskToUnicode - = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; - - converterData->isFirstBuffer=TRUE; - (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); - len = (int32_t)uprv_strlen(converterData->name); - converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); - converterData->name[len+1]=0; - - converterData->prevToUnicodeStatus = 0x0000; - } else { - uprv_free(cnv->extraInfo); - cnv->extraInfo = NULL; - *errorCode = U_ILLEGAL_ARGUMENT_ERROR; - } - - } else { - *errorCode =U_MEMORY_ALLOCATION_ERROR; - } -} - -static void U_CALLCONV -_ISCIIClose(UConverter *cnv) { - if (cnv->extraInfo!=NULL) { - if (!cnv->isExtraLocal) { - uprv_free(cnv->extraInfo); - } - cnv->extraInfo=NULL; - } -} - -static const char* U_CALLCONV -_ISCIIgetName(const UConverter* cnv) { - if (cnv->extraInfo) { - UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; - return myData->name; - } - return NULL; -} - -static void U_CALLCONV -_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { - UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); - if (choice<=UCNV_RESET_TO_UNICODE) { - cnv->toUnicodeStatus = missingCharMarker; - cnv->mode=0; - data->currentDeltaToUnicode=data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - data->contextCharToUnicode=NO_CHAR_MARKER; - data->prevToUnicodeStatus = 0x0000; - } - if (choice!=UCNV_RESET_TO_UNICODE) { - cnv->fromUChar32=0x0000; - data->contextCharFromUnicode=0x00; - data->currentMaskFromUnicode=data->defMaskToUnicode; - data->currentDeltaFromUnicode=data->defDeltaToUnicode; - data->isFirstBuffer=TRUE; - data->resetToDefaultToUnicode=FALSE; - } -} - -/** - * The values in validity table are indexed by the lower bits of Unicode - * range 0x0900 - 0x09ff. The values have a structure like: - * --------------------------------------------------------------- - * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | - * | | | | | ASM | KND | | | - * --------------------------------------------------------------- - * If a code point is valid in a particular script - * then that bit is turned on - * - * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for - * to represent these languages - * - * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case - * and combine and use 1 bit to represent these languages. - * - * TODO: It is probably easier to understand and maintain to change this - * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. - */ - -static const uint8_t validityTable[128] = { -/* This state table is tool generated please do not edit unless you know exactly what you are doing */ -/* Note: This table was edited to mirror the Windows XP implementation */ -/*ISCII:Valid:Unicode */ -/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , -/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , -/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , -/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , -/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , -/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , -/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , -/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , -/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , -/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , -/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , -/* - * The length of the array is 128 to provide values for 0x900..0x97f. - * The last 15 entries for 0x971..0x97f of the validity table are all zero - * because no Indic script uses such Unicode code points. - */ -/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO -}; - -static const uint16_t fromUnicodeTable[128]={ - 0x00a0 ,/* 0x0900 */ - 0x00a1 ,/* 0x0901 */ - 0x00a2 ,/* 0x0902 */ - 0x00a3 ,/* 0x0903 */ - 0xa4e0 ,/* 0x0904 */ - 0x00a4 ,/* 0x0905 */ - 0x00a5 ,/* 0x0906 */ - 0x00a6 ,/* 0x0907 */ - 0x00a7 ,/* 0x0908 */ - 0x00a8 ,/* 0x0909 */ - 0x00a9 ,/* 0x090a */ - 0x00aa ,/* 0x090b */ - 0xA6E9 ,/* 0x090c */ - 0x00ae ,/* 0x090d */ - 0x00ab ,/* 0x090e */ - 0x00ac ,/* 0x090f */ - 0x00ad ,/* 0x0910 */ - 0x00b2 ,/* 0x0911 */ - 0x00af ,/* 0x0912 */ - 0x00b0 ,/* 0x0913 */ - 0x00b1 ,/* 0x0914 */ - 0x00b3 ,/* 0x0915 */ - 0x00b4 ,/* 0x0916 */ - 0x00b5 ,/* 0x0917 */ - 0x00b6 ,/* 0x0918 */ - 0x00b7 ,/* 0x0919 */ - 0x00b8 ,/* 0x091a */ - 0x00b9 ,/* 0x091b */ - 0x00ba ,/* 0x091c */ - 0x00bb ,/* 0x091d */ - 0x00bc ,/* 0x091e */ - 0x00bd ,/* 0x091f */ - 0x00be ,/* 0x0920 */ - 0x00bf ,/* 0x0921 */ - 0x00c0 ,/* 0x0922 */ - 0x00c1 ,/* 0x0923 */ - 0x00c2 ,/* 0x0924 */ - 0x00c3 ,/* 0x0925 */ - 0x00c4 ,/* 0x0926 */ - 0x00c5 ,/* 0x0927 */ - 0x00c6 ,/* 0x0928 */ - 0x00c7 ,/* 0x0929 */ - 0x00c8 ,/* 0x092a */ - 0x00c9 ,/* 0x092b */ - 0x00ca ,/* 0x092c */ - 0x00cb ,/* 0x092d */ - 0x00cc ,/* 0x092e */ - 0x00cd ,/* 0x092f */ - 0x00cf ,/* 0x0930 */ - 0x00d0 ,/* 0x0931 */ - 0x00d1 ,/* 0x0932 */ - 0x00d2 ,/* 0x0933 */ - 0x00d3 ,/* 0x0934 */ - 0x00d4 ,/* 0x0935 */ - 0x00d5 ,/* 0x0936 */ - 0x00d6 ,/* 0x0937 */ - 0x00d7 ,/* 0x0938 */ - 0x00d8 ,/* 0x0939 */ - 0xFFFF ,/* 0x093A */ - 0xFFFF ,/* 0x093B */ - 0x00e9 ,/* 0x093c */ - 0xEAE9 ,/* 0x093d */ - 0x00da ,/* 0x093e */ - 0x00db ,/* 0x093f */ - 0x00dc ,/* 0x0940 */ - 0x00dd ,/* 0x0941 */ - 0x00de ,/* 0x0942 */ - 0x00df ,/* 0x0943 */ - 0xDFE9 ,/* 0x0944 */ - 0x00e3 ,/* 0x0945 */ - 0x00e0 ,/* 0x0946 */ - 0x00e1 ,/* 0x0947 */ - 0x00e2 ,/* 0x0948 */ - 0x00e7 ,/* 0x0949 */ - 0x00e4 ,/* 0x094a */ - 0x00e5 ,/* 0x094b */ - 0x00e6 ,/* 0x094c */ - 0x00e8 ,/* 0x094d */ - 0x00ec ,/* 0x094e */ - 0x00ed ,/* 0x094f */ - 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ - 0xFFFF ,/* 0x0951 */ - 0xF0B8 ,/* 0x0952 */ - 0xFFFF ,/* 0x0953 */ - 0xFFFF ,/* 0x0954 */ - 0xFFFF ,/* 0x0955 */ - 0xFFFF ,/* 0x0956 */ - 0xFFFF ,/* 0x0957 */ - 0xb3e9 ,/* 0x0958 */ - 0xb4e9 ,/* 0x0959 */ - 0xb5e9 ,/* 0x095a */ - 0xbae9 ,/* 0x095b */ - 0xbfe9 ,/* 0x095c */ - 0xC0E9 ,/* 0x095d */ - 0xc9e9 ,/* 0x095e */ - 0x00ce ,/* 0x095f */ - 0xAAe9 ,/* 0x0960 */ - 0xA7E9 ,/* 0x0961 */ - 0xDBE9 ,/* 0x0962 */ - 0xDCE9 ,/* 0x0963 */ - 0x00ea ,/* 0x0964 */ - 0xeaea ,/* 0x0965 */ - 0x00f1 ,/* 0x0966 */ - 0x00f2 ,/* 0x0967 */ - 0x00f3 ,/* 0x0968 */ - 0x00f4 ,/* 0x0969 */ - 0x00f5 ,/* 0x096a */ - 0x00f6 ,/* 0x096b */ - 0x00f7 ,/* 0x096c */ - 0x00f8 ,/* 0x096d */ - 0x00f9 ,/* 0x096e */ - 0x00fa ,/* 0x096f */ - 0xF0BF ,/* 0x0970 */ - 0xFFFF ,/* 0x0971 */ - 0xFFFF ,/* 0x0972 */ - 0xFFFF ,/* 0x0973 */ - 0xFFFF ,/* 0x0974 */ - 0xFFFF ,/* 0x0975 */ - 0xFFFF ,/* 0x0976 */ - 0xFFFF ,/* 0x0977 */ - 0xFFFF ,/* 0x0978 */ - 0xFFFF ,/* 0x0979 */ - 0xFFFF ,/* 0x097a */ - 0xFFFF ,/* 0x097b */ - 0xFFFF ,/* 0x097c */ - 0xFFFF ,/* 0x097d */ - 0xFFFF ,/* 0x097e */ - 0xFFFF ,/* 0x097f */ -}; -static const uint16_t toUnicodeTable[256]={ - 0x0000,/* 0x00 */ - 0x0001,/* 0x01 */ - 0x0002,/* 0x02 */ - 0x0003,/* 0x03 */ - 0x0004,/* 0x04 */ - 0x0005,/* 0x05 */ - 0x0006,/* 0x06 */ - 0x0007,/* 0x07 */ - 0x0008,/* 0x08 */ - 0x0009,/* 0x09 */ - 0x000a,/* 0x0a */ - 0x000b,/* 0x0b */ - 0x000c,/* 0x0c */ - 0x000d,/* 0x0d */ - 0x000e,/* 0x0e */ - 0x000f,/* 0x0f */ - 0x0010,/* 0x10 */ - 0x0011,/* 0x11 */ - 0x0012,/* 0x12 */ - 0x0013,/* 0x13 */ - 0x0014,/* 0x14 */ - 0x0015,/* 0x15 */ - 0x0016,/* 0x16 */ - 0x0017,/* 0x17 */ - 0x0018,/* 0x18 */ - 0x0019,/* 0x19 */ - 0x001a,/* 0x1a */ - 0x001b,/* 0x1b */ - 0x001c,/* 0x1c */ - 0x001d,/* 0x1d */ - 0x001e,/* 0x1e */ - 0x001f,/* 0x1f */ - 0x0020,/* 0x20 */ - 0x0021,/* 0x21 */ - 0x0022,/* 0x22 */ - 0x0023,/* 0x23 */ - 0x0024,/* 0x24 */ - 0x0025,/* 0x25 */ - 0x0026,/* 0x26 */ - 0x0027,/* 0x27 */ - 0x0028,/* 0x28 */ - 0x0029,/* 0x29 */ - 0x002a,/* 0x2a */ - 0x002b,/* 0x2b */ - 0x002c,/* 0x2c */ - 0x002d,/* 0x2d */ - 0x002e,/* 0x2e */ - 0x002f,/* 0x2f */ - 0x0030,/* 0x30 */ - 0x0031,/* 0x31 */ - 0x0032,/* 0x32 */ - 0x0033,/* 0x33 */ - 0x0034,/* 0x34 */ - 0x0035,/* 0x35 */ - 0x0036,/* 0x36 */ - 0x0037,/* 0x37 */ - 0x0038,/* 0x38 */ - 0x0039,/* 0x39 */ - 0x003A,/* 0x3A */ - 0x003B,/* 0x3B */ - 0x003c,/* 0x3c */ - 0x003d,/* 0x3d */ - 0x003e,/* 0x3e */ - 0x003f,/* 0x3f */ - 0x0040,/* 0x40 */ - 0x0041,/* 0x41 */ - 0x0042,/* 0x42 */ - 0x0043,/* 0x43 */ - 0x0044,/* 0x44 */ - 0x0045,/* 0x45 */ - 0x0046,/* 0x46 */ - 0x0047,/* 0x47 */ - 0x0048,/* 0x48 */ - 0x0049,/* 0x49 */ - 0x004a,/* 0x4a */ - 0x004b,/* 0x4b */ - 0x004c,/* 0x4c */ - 0x004d,/* 0x4d */ - 0x004e,/* 0x4e */ - 0x004f,/* 0x4f */ - 0x0050,/* 0x50 */ - 0x0051,/* 0x51 */ - 0x0052,/* 0x52 */ - 0x0053,/* 0x53 */ - 0x0054,/* 0x54 */ - 0x0055,/* 0x55 */ - 0x0056,/* 0x56 */ - 0x0057,/* 0x57 */ - 0x0058,/* 0x58 */ - 0x0059,/* 0x59 */ - 0x005a,/* 0x5a */ - 0x005b,/* 0x5b */ - 0x005c,/* 0x5c */ - 0x005d,/* 0x5d */ - 0x005e,/* 0x5e */ - 0x005f,/* 0x5f */ - 0x0060,/* 0x60 */ - 0x0061,/* 0x61 */ - 0x0062,/* 0x62 */ - 0x0063,/* 0x63 */ - 0x0064,/* 0x64 */ - 0x0065,/* 0x65 */ - 0x0066,/* 0x66 */ - 0x0067,/* 0x67 */ - 0x0068,/* 0x68 */ - 0x0069,/* 0x69 */ - 0x006a,/* 0x6a */ - 0x006b,/* 0x6b */ - 0x006c,/* 0x6c */ - 0x006d,/* 0x6d */ - 0x006e,/* 0x6e */ - 0x006f,/* 0x6f */ - 0x0070,/* 0x70 */ - 0x0071,/* 0x71 */ - 0x0072,/* 0x72 */ - 0x0073,/* 0x73 */ - 0x0074,/* 0x74 */ - 0x0075,/* 0x75 */ - 0x0076,/* 0x76 */ - 0x0077,/* 0x77 */ - 0x0078,/* 0x78 */ - 0x0079,/* 0x79 */ - 0x007a,/* 0x7a */ - 0x007b,/* 0x7b */ - 0x007c,/* 0x7c */ - 0x007d,/* 0x7d */ - 0x007e,/* 0x7e */ - 0x007f,/* 0x7f */ - 0x0080,/* 0x80 */ - 0x0081,/* 0x81 */ - 0x0082,/* 0x82 */ - 0x0083,/* 0x83 */ - 0x0084,/* 0x84 */ - 0x0085,/* 0x85 */ - 0x0086,/* 0x86 */ - 0x0087,/* 0x87 */ - 0x0088,/* 0x88 */ - 0x0089,/* 0x89 */ - 0x008a,/* 0x8a */ - 0x008b,/* 0x8b */ - 0x008c,/* 0x8c */ - 0x008d,/* 0x8d */ - 0x008e,/* 0x8e */ - 0x008f,/* 0x8f */ - 0x0090,/* 0x90 */ - 0x0091,/* 0x91 */ - 0x0092,/* 0x92 */ - 0x0093,/* 0x93 */ - 0x0094,/* 0x94 */ - 0x0095,/* 0x95 */ - 0x0096,/* 0x96 */ - 0x0097,/* 0x97 */ - 0x0098,/* 0x98 */ - 0x0099,/* 0x99 */ - 0x009a,/* 0x9a */ - 0x009b,/* 0x9b */ - 0x009c,/* 0x9c */ - 0x009d,/* 0x9d */ - 0x009e,/* 0x9e */ - 0x009f,/* 0x9f */ - 0x00A0,/* 0xa0 */ - 0x0901,/* 0xa1 */ - 0x0902,/* 0xa2 */ - 0x0903,/* 0xa3 */ - 0x0905,/* 0xa4 */ - 0x0906,/* 0xa5 */ - 0x0907,/* 0xa6 */ - 0x0908,/* 0xa7 */ - 0x0909,/* 0xa8 */ - 0x090a,/* 0xa9 */ - 0x090b,/* 0xaa */ - 0x090e,/* 0xab */ - 0x090f,/* 0xac */ - 0x0910,/* 0xad */ - 0x090d,/* 0xae */ - 0x0912,/* 0xaf */ - 0x0913,/* 0xb0 */ - 0x0914,/* 0xb1 */ - 0x0911,/* 0xb2 */ - 0x0915,/* 0xb3 */ - 0x0916,/* 0xb4 */ - 0x0917,/* 0xb5 */ - 0x0918,/* 0xb6 */ - 0x0919,/* 0xb7 */ - 0x091a,/* 0xb8 */ - 0x091b,/* 0xb9 */ - 0x091c,/* 0xba */ - 0x091d,/* 0xbb */ - 0x091e,/* 0xbc */ - 0x091f,/* 0xbd */ - 0x0920,/* 0xbe */ - 0x0921,/* 0xbf */ - 0x0922,/* 0xc0 */ - 0x0923,/* 0xc1 */ - 0x0924,/* 0xc2 */ - 0x0925,/* 0xc3 */ - 0x0926,/* 0xc4 */ - 0x0927,/* 0xc5 */ - 0x0928,/* 0xc6 */ - 0x0929,/* 0xc7 */ - 0x092a,/* 0xc8 */ - 0x092b,/* 0xc9 */ - 0x092c,/* 0xca */ - 0x092d,/* 0xcb */ - 0x092e,/* 0xcc */ - 0x092f,/* 0xcd */ - 0x095f,/* 0xce */ - 0x0930,/* 0xcf */ - 0x0931,/* 0xd0 */ - 0x0932,/* 0xd1 */ - 0x0933,/* 0xd2 */ - 0x0934,/* 0xd3 */ - 0x0935,/* 0xd4 */ - 0x0936,/* 0xd5 */ - 0x0937,/* 0xd6 */ - 0x0938,/* 0xd7 */ - 0x0939,/* 0xd8 */ - 0x200D,/* 0xd9 */ - 0x093e,/* 0xda */ - 0x093f,/* 0xdb */ - 0x0940,/* 0xdc */ - 0x0941,/* 0xdd */ - 0x0942,/* 0xde */ - 0x0943,/* 0xdf */ - 0x0946,/* 0xe0 */ - 0x0947,/* 0xe1 */ - 0x0948,/* 0xe2 */ - 0x0945,/* 0xe3 */ - 0x094a,/* 0xe4 */ - 0x094b,/* 0xe5 */ - 0x094c,/* 0xe6 */ - 0x0949,/* 0xe7 */ - 0x094d,/* 0xe8 */ - 0x093c,/* 0xe9 */ - 0x0964,/* 0xea */ - 0xFFFF,/* 0xeb */ - 0xFFFF,/* 0xec */ - 0xFFFF,/* 0xed */ - 0xFFFF,/* 0xee */ - 0xFFFF,/* 0xef */ - 0xFFFF,/* 0xf0 */ - 0x0966,/* 0xf1 */ - 0x0967,/* 0xf2 */ - 0x0968,/* 0xf3 */ - 0x0969,/* 0xf4 */ - 0x096a,/* 0xf5 */ - 0x096b,/* 0xf6 */ - 0x096c,/* 0xf7 */ - 0x096d,/* 0xf8 */ - 0x096e,/* 0xf9 */ - 0x096f,/* 0xfa */ - 0xFFFF,/* 0xfb */ - 0xFFFF,/* 0xfc */ - 0xFFFF,/* 0xfd */ - 0xFFFF,/* 0xfe */ - 0xFFFF /* 0xff */ -}; - -static const uint16_t vowelSignESpecialCases[][2]={ - { 2 /*length of array*/ , 0 }, - { 0xA4 , 0x0904 }, -}; - -static const uint16_t nuktaSpecialCases[][2]={ - { 16 /*length of array*/ , 0 }, - { 0xA6 , 0x090c }, - { 0xEA , 0x093D }, - { 0xDF , 0x0944 }, - { 0xA1 , 0x0950 }, - { 0xb3 , 0x0958 }, - { 0xb4 , 0x0959 }, - { 0xb5 , 0x095a }, - { 0xba , 0x095b }, - { 0xbf , 0x095c }, - { 0xC0 , 0x095d }, - { 0xc9 , 0x095e }, - { 0xAA , 0x0960 }, - { 0xA7 , 0x0961 }, - { 0xDB , 0x0962 }, - { 0xDC , 0x0963 }, -}; - - -#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \ - int32_t offset = (int32_t)(source - args->source-1); \ - /* write the targetUniChar to target */ \ - if(target < targetLimit){ \ - if(targetByteUnit <= 0xFF){ \ - *(target)++ = (uint8_t)(targetByteUnit); \ - if(offsets){ \ - *(offsets++) = offset; \ - } \ - }else{ \ - if (targetByteUnit > 0xFFFF) { \ - *(target)++ = (uint8_t)(targetByteUnit>>16); \ - if (offsets) { \ - --offset; \ - *(offsets++) = offset; \ - } \ - } \ - if (!(target < targetLimit)) { \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t)(targetByteUnit >> 8); \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t)targetByteUnit; \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } else { \ - *(target)++ = (uint8_t)(targetByteUnit>>8); \ - if(offsets){ \ - *(offsets++) = offset; \ - } \ - if(target < targetLimit){ \ - *(target)++ = (uint8_t) targetByteUnit; \ - if(offsets){ \ - *(offsets++) = offset ; \ - } \ - }else{ \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ - (uint8_t) (targetByteUnit); \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ - } \ - } \ - }else{ \ - if (targetByteUnit & 0xFF0000) { \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit >>16); \ - } \ - if(targetByteUnit & 0xFF00){ \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit >>8); \ - } \ - args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ - (uint8_t) (targetByteUnit); \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ -} UPRV_BLOCK_MACRO_END - -/* Rules: - * Explicit Halant : - * <HALANT> + <ZWNJ> - * Soft Halant : - * <HALANT> + <ZWJ> - */ -static void U_CALLCONV -UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( - UConverterFromUnicodeArgs * args, UErrorCode * err) { - const UChar *source = args->source; - const UChar *sourceLimit = args->sourceLimit; - unsigned char *target = (unsigned char *) args->target; - unsigned char *targetLimit = (unsigned char *) args->targetLimit; - int32_t* offsets = args->offsets; - uint32_t targetByteUnit = 0x0000; - UChar32 sourceChar = 0x0000; - UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ - UConverterDataISCII *converterData; - uint16_t newDelta=0; - uint16_t range = 0; - UBool deltaChanged = FALSE; - - if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - /* initialize data */ - converterData=(UConverterDataISCII*)args->converter->extraInfo; - newDelta=converterData->currentDeltaFromUnicode; - range = (uint16_t)(newDelta/DELTA); - - if ((sourceChar = args->converter->fromUChar32)!=0) { - goto getTrail; - } - - /*writing the char to the output stream */ - while (source < sourceLimit) { - /* Write the language code following LF only if LF is not the last character. */ - if (args->converter->fromUnicodeStatus == LF) { - targetByteUnit = ATR<<8; - targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; - args->converter->fromUnicodeStatus = 0x0000; - /* now append ATR and language code */ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } - - sourceChar = *source++; - tempContextFromUnicode = converterData->contextCharFromUnicode; - - targetByteUnit = missingCharMarker; - - /*check if input is in ASCII and C0 control codes range*/ - if (sourceChar <= ASCII_END) { - args->converter->fromUnicodeStatus = sourceChar; - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); - if (U_FAILURE(*err)) { - break; - } - continue; - } - switch (sourceChar) { - case ZWNJ: - /* contextChar has HALANT */ - if (converterData->contextCharFromUnicode) { - converterData->contextCharFromUnicode = 0x00; - targetByteUnit = ISCII_HALANT; - } else { - /* consume ZWNJ and continue */ - converterData->contextCharFromUnicode = 0x00; - continue; - } - break; - case ZWJ: - /* contextChar has HALANT */ - if (converterData->contextCharFromUnicode) { - targetByteUnit = ISCII_NUKTA; - } else { - targetByteUnit =ISCII_INV; - } - converterData->contextCharFromUnicode = 0x00; - break; - default: - /* is the sourceChar in the INDIC_RANGE? */ - if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { - /* Danda and Double Danda are valid in Northern scripts.. since Unicode - * does not include these codepoints in all Northern scrips we need to - * filter them out - */ - if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { - /* find out to which block the souceChar belongs*/ - range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); - newDelta =(uint16_t)(range*DELTA); - - /* Now are we in the same block as the previous? */ - if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { - converterData->currentDeltaFromUnicode = newDelta; - converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; - deltaChanged =TRUE; - converterData->isFirstBuffer=FALSE; - } - - if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { - if (sourceChar == PNJ_TIPPI) { - /* Make sure Tippi is converterd to Bindi. */ - sourceChar = PNJ_BINDI; - } else if (sourceChar == PNJ_ADHAK) { - /* This is for consonant cluster handling. */ - converterData->contextCharFromUnicode = PNJ_ADHAK; - } - - } - /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ - /* now subtract the new delta from sourceChar*/ - sourceChar -= converterData->currentDeltaFromUnicode; - } - - /* get the target byte unit */ - targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; - - /* is the code point valid in current script? */ - if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { - /* Vocallic RR is assigned in ISCII Telugu and Unicode */ - if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { - targetByteUnit=missingCharMarker; - } - } - - if (deltaChanged) { - /* we are in a script block which is different than - * previous sourceChar's script block write ATR and language codes - */ - uint32_t temp=0; - temp =(uint16_t)(ATR<<8); - temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); - /* reset */ - deltaChanged=FALSE; - /* now append ATR and language code */ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); - if (U_FAILURE(*err)) { - break; - } - } - - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { - continue; - } - } - /* reset context char */ - converterData->contextCharFromUnicode = 0x00; - break; - } - if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { - /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ - /* reset context char */ - converterData->contextCharFromUnicode = 0x0000; - targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; - /* write targetByteUnit to target */ - WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } else if (targetByteUnit != missingCharMarker) { - if (targetByteUnit==ISCII_HALANT) { - converterData->contextCharFromUnicode = (UChar)targetByteUnit; - } - /* write targetByteUnit to target*/ - WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); - if (U_FAILURE(*err)) { - break; - } - } else { - /* oops.. the code point is unassigned */ - /*check if the char is a First surrogate*/ - if (U16_IS_SURROGATE(sourceChar)) { - if (U16_IS_SURROGATE_LEAD(sourceChar)) { -getTrail: - /*look ahead to find the trail surrogate*/ - if (source < sourceLimit) { - /* test the following code unit */ - UChar trail= (*source); - if (U16_IS_TRAIL(trail)) { - source++; - sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); - *err =U_INVALID_CHAR_FOUND; - /* convert this surrogate code point */ - /* exit this condition tree */ - } else { - /* this is an unmatched lead code unit (1st surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* no more input */ - *err = U_ZERO_ERROR; - } - } else { - /* this is an unmatched trail code unit (2nd surrogate) */ - /* callback(illegal) */ - *err=U_ILLEGAL_CHAR_FOUND; - } - } else { - /* callback(unassigned) for a BMP code point */ - *err = U_INVALID_CHAR_FOUND; - } - - args->converter->fromUChar32=sourceChar; - break; - } - }/* end while(mySourceIndex<mySourceLength) */ - - /*save the state and return */ - args->source = source; - args->target = (char*)target; -} - -static const uint16_t lookupTable[][2]={ - { ZERO, ZERO }, /*DEFALT*/ - { ZERO, ZERO }, /*ROMAN*/ - { DEVANAGARI, DEV_MASK }, - { BENGALI, BNG_MASK }, - { TAMIL, TML_MASK }, - { TELUGU, KND_MASK }, - { BENGALI, BNG_MASK }, - { ORIYA, ORI_MASK }, - { KANNADA, KND_MASK }, - { MALAYALAM, MLM_MASK }, - { GUJARATI, GJR_MASK }, - { GURMUKHI, PNJ_MASK } -}; - -#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \ - /* add offset to current Indic Block */ \ - if(targetUniChar>ASCII_END && \ - targetUniChar != ZWJ && \ - targetUniChar != ZWNJ && \ - targetUniChar != DANDA && \ - targetUniChar != DOUBLE_DANDA){ \ - \ - targetUniChar+=(uint16_t)(delta); \ - } \ - /* now write the targetUniChar */ \ - if(target<args->targetLimit){ \ - *(target)++ = (UChar)targetUniChar; \ - if(offsets){ \ - *(offsets)++ = (int32_t)(offset); \ - } \ - }else{ \ - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ - (UChar)targetUniChar; \ - *err = U_BUFFER_OVERFLOW_ERROR; \ - } \ -} UPRV_BLOCK_MACRO_END - -#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \ - targetUniChar = toUnicodeTable[(sourceChar)] ; \ - /* is the code point valid in current script? */ \ - if(sourceChar> ASCII_END && \ - (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ - /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ - if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ - targetUniChar!=VOCALLIC_RR){ \ - targetUniChar=missingCharMarker; \ - } \ - } \ -} UPRV_BLOCK_MACRO_END - -/*********** - * Rules for ISCII to Unicode converter - * ISCII is stateful encoding. To convert ISCII bytes to Unicode, - * which has both precomposed and decomposed forms characters - * pre-context and post-context need to be considered. - * - * Post context - * i) ATR : Attribute code is used to declare the font and script switching. - * Currently we only switch scripts and font codes consumed without generating an error - * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, - * obsolete characters - * Pre context - * i) Halant: if preceeded by a halant then it is a explicit halant - * ii) Nukta : - * a) if preceeded by a halant then it is a soft halant - * b) if preceeded by specific consonants and the ligatures have pre-composed - * characters in Unicode then convert to pre-composed characters - * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda - * - */ - -static void U_CALLCONV -UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { - const char *source = ( char *) args->source; - UChar *target = args->target; - const char *sourceLimit = args->sourceLimit; - const UChar* targetLimit = args->targetLimit; - uint32_t targetUniChar = 0x0000; - uint8_t sourceChar = 0x0000; - UConverterDataISCII* data; - UChar32* toUnicodeStatus=NULL; - UChar32 tempTargetUniChar = 0x0000; - UChar* contextCharToUnicode= NULL; - UBool found; - int i; - int offset = 0; - - if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { - *err = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - - data = (UConverterDataISCII*)(args->converter->extraInfo); - contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ - toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ - - while (U_SUCCESS(*err) && source<sourceLimit) { - - targetUniChar = missingCharMarker; - - if (target < targetLimit) { - sourceChar = (unsigned char)*(source)++; - - /* look at the post-context preform special processing */ - if (*contextCharToUnicode==ATR) { - - /* If we have ATR in *contextCharToUnicode then we need to change our - * state to the Indic Script specified by sourceChar - */ - - /* check if the sourceChar is supported script range*/ - if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) { - data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); - data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; - } else if (sourceChar==DEF) { - /* switch back to default */ - data->currentDeltaToUnicode = data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - } else { - if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { - /* these are display codes consume and continue */ - } else { - *err =U_ILLEGAL_CHAR_FOUND; - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - goto CALLBACK; - } - } - - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - - continue; - - } else if (*contextCharToUnicode==EXT) { - /* check if sourceChar is in 0xA1-0xEE range */ - if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { - /* We currently support only Anudatta and Devanagari abbreviation sign */ - if (sourceChar==0xBF || sourceChar == 0xB8) { - targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; - - /* find out if the mapping is valid in this state */ - if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { - *contextCharToUnicode= NO_CHAR_MARKER; - - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* write to target */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - - continue; - } - } - /* byte unit is unassigned */ - targetUniChar = missingCharMarker; - *err= U_INVALID_CHAR_FOUND; - } else { - /* only 0xA1 - 0xEE are legal after EXT char */ - *contextCharToUnicode= NO_CHAR_MARKER; - *err = U_ILLEGAL_CHAR_FOUND; - } - goto CALLBACK; - } else if (*contextCharToUnicode==ISCII_INV) { - if (sourceChar==ISCII_HALANT) { - targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ - } else { - targetUniChar = ZWJ; - } - - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* write to target */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - /* reset */ - *contextCharToUnicode=NO_CHAR_MARKER; - } - - /* look at the pre-context and perform special processing */ - switch (sourceChar) { - case ISCII_INV: - case EXT: - case ATR: - *contextCharToUnicode = (UChar)sourceChar; - - if (*toUnicodeStatus != missingCharMarker) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); - *toUnicodeStatus = missingCharMarker; - } - continue; - case ISCII_DANDA: - /* handle double danda*/ - if (*contextCharToUnicode== ISCII_DANDA) { - targetUniChar = DOUBLE_DANDA; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - } else { - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - } - break; - case ISCII_HALANT: - /* handle explicit halant */ - if (*contextCharToUnicode == ISCII_HALANT) { - targetUniChar = ZWNJ; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - } else { - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - } - break; - case 0x0A: - case 0x0D: - data->resetToDefaultToUnicode = TRUE; - GET_MAPPING(sourceChar,targetUniChar,data) - ; - *contextCharToUnicode = sourceChar; - break; - - case ISCII_VOWEL_SIGN_E: - i=1; - found=FALSE; - for (; i<vowelSignESpecialCases[0][0]; i++) { - U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases)); - if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) { - targetUniChar=vowelSignESpecialCases[i][1]; - found=TRUE; - break; - } - } - if (found) { - /* find out if the mapping is valid in this state */ - if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { - /*targetUniChar += data->currentDeltaToUnicode ;*/ - *contextCharToUnicode= NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - break; - } - } - GET_MAPPING(sourceChar,targetUniChar,data); - *contextCharToUnicode = sourceChar; - break; - - case ISCII_NUKTA: - /* handle soft halant */ - if (*contextCharToUnicode == ISCII_HALANT) { - targetUniChar = ZWJ; - /* clear the context */ - *contextCharToUnicode = NO_CHAR_MARKER; - break; - } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. - * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). - */ - targetUniChar = PNJ_RRA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - if (U_SUCCESS(*err)) { - targetUniChar = PNJ_SIGN_VIRAMA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - if (U_SUCCESS(*err)) { - targetUniChar = PNJ_HA; - WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); - } else { - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; - } - } else { - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; - args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; - } - *toUnicodeStatus = missingCharMarker; - data->contextCharToUnicode = NO_CHAR_MARKER; - continue; - } else { - /* try to handle <CHAR> + ISCII_NUKTA special mappings */ - i=1; - found =FALSE; - for (; i<nuktaSpecialCases[0][0]; i++) { - if (nuktaSpecialCases[i][0]==(uint8_t) - *contextCharToUnicode) { - targetUniChar=nuktaSpecialCases[i][1]; - found =TRUE; - break; - } - } - if (found) { - /* find out if the mapping is valid in this state */ - if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { - /*targetUniChar += data->currentDeltaToUnicode ;*/ - *contextCharToUnicode= NO_CHAR_MARKER; - *toUnicodeStatus = missingCharMarker; - if (data->currentDeltaToUnicode == PNJ_DELTA) { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); - continue; - } - break; - } - /* else fall through to default */ - } - /* else fall through to default */ - U_FALLTHROUGH; - } - default:GET_MAPPING(sourceChar,targetUniChar,data) - ; - *contextCharToUnicode = sourceChar; - break; - } - - if (*toUnicodeStatus != missingCharMarker) { - /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ - if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && - (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) { - /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ - offset = (int)(source-args->source - 3); - tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ - *toUnicodeStatus = missingCharMarker; - continue; - } else { - /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ - if (data->prevToUnicodeStatus) { - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); - data->prevToUnicodeStatus = 0x0000; - } - /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. - * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. - */ - if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { - targetUniChar = PNJ_TIPPI - PNJ_DELTA; - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); - } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { - /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ - data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; - } else { - /* write the previously mapped codepoint */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); - } - } - *toUnicodeStatus = missingCharMarker; - } - - if (targetUniChar != missingCharMarker) { - /* now save the targetUniChar for delayed write */ - *toUnicodeStatus = (UChar) targetUniChar; - if (data->resetToDefaultToUnicode==TRUE) { - data->currentDeltaToUnicode = data->defDeltaToUnicode; - data->currentMaskToUnicode = data->defMaskToUnicode; - data->resetToDefaultToUnicode=FALSE; - } - } else { - - /* we reach here only if targetUniChar == missingCharMarker - * so assign codes to reason and err - */ - *err = U_INVALID_CHAR_FOUND; -CALLBACK: - args->converter->toUBytes[0] = (uint8_t) sourceChar; - args->converter->toULength = 1; - break; - } - - } else { - *err =U_BUFFER_OVERFLOW_ERROR; - break; - } - } - - if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { - /* end of the input stream */ - UConverter *cnv = args->converter; - - if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { - /* set toUBytes[] */ - cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; - cnv->toULength = 1; - - /* avoid looping on truncated sequences */ - *contextCharToUnicode = NO_CHAR_MARKER; - } else { - cnv->toULength = 0; - } - - if (*toUnicodeStatus != missingCharMarker) { - /* output a remaining target character */ - WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); - *toUnicodeStatus = missingCharMarker; - } - } - - args->target = target; - args->source = source; -} - -/* structure for SafeClone calculations */ -struct cloneISCIIStruct { - UConverter cnv; - UConverterDataISCII mydata; -}; - -static UConverter * U_CALLCONV -_ISCII_SafeClone(const UConverter *cnv, - void *stackBuffer, - int32_t *pBufferSize, - UErrorCode *status) -{ - struct cloneISCIIStruct * localClone; - int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); - - if (U_FAILURE(*status)) { - return 0; - } - - if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ - *pBufferSize = bufferSizeNeeded; - return 0; - } - - localClone = (struct cloneISCIIStruct *)stackBuffer; - /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ - - uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); - localClone->cnv.extraInfo = &localClone->mydata; - localClone->cnv.isExtraLocal = TRUE; - - return &localClone->cnv; -} - -static void U_CALLCONV -_ISCIIGetUnicodeSet(const UConverter *cnv, - const USetAdder *sa, - UConverterUnicodeSet which, - UErrorCode *pErrorCode) -{ - (void)cnv; - (void)which; - (void)pErrorCode; - int32_t idx, script; - uint8_t mask; - - /* Since all ISCII versions allow switching to other ISCII - scripts, we add all roundtrippable characters to this set. */ - sa->addRange(sa->set, 0, ASCII_END); - for (script = DEVANAGARI; script <= MALAYALAM; script++) { - mask = (uint8_t)(lookupInitialData[script].maskEnum); - for (idx = 0; idx < DELTA; idx++) { - /* added check for TELUGU character */ - if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { - sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); - } - } - } - sa->add(sa->set, DANDA); - sa->add(sa->set, DOUBLE_DANDA); - sa->add(sa->set, ZWNJ); - sa->add(sa->set, ZWJ); -} -U_CDECL_END -static const UConverterImpl _ISCIIImpl={ - - UCNV_ISCII, - - NULL, - NULL, - - _ISCIIOpen, - _ISCIIClose, - _ISCIIReset, - - UConverter_toUnicode_ISCII_OFFSETS_LOGIC, - UConverter_toUnicode_ISCII_OFFSETS_LOGIC, - UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, - UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, - NULL, - - NULL, - _ISCIIgetName, - NULL, - _ISCII_SafeClone, - _ISCIIGetUnicodeSet, - NULL, - NULL -}; - -static const UConverterStaticData _ISCIIStaticData={ - sizeof(UConverterStaticData), - "ISCII", - 0, - UCNV_IBM, - UCNV_ISCII, - 1, - 4, - { 0x1a, 0, 0, 0 }, - 0x1, - FALSE, - FALSE, - 0x0, - 0x0, - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ - -}; - -const UConverterSharedData _ISCIIData= - UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl); - -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (C) 2000-2016, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: ucnvisci.c +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001JUN26 +* created by: Ram Viswanadha +* +* Date Name Description +* 24/7/2001 Ram Added support for EXT character handling +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + +#include "unicode/ucnv.h" +#include "unicode/ucnv_cb.h" +#include "unicode/utf16.h" +#include "cmemory.h" +#include "ucnv_bld.h" +#include "ucnv_cnv.h" +#include "cstring.h" +#include "uassert.h" + +#define UCNV_OPTIONS_VERSION_MASK 0xf +#define NUKTA 0x093c +#define HALANT 0x094d +#define ZWNJ 0x200c /* Zero Width Non Joiner */ +#define ZWJ 0x200d /* Zero width Joiner */ +#define INVALID_CHAR 0xffff +#define ATR 0xEF /* Attribute code */ +#define EXT 0xF0 /* Extension code */ +#define DANDA 0x0964 +#define DOUBLE_DANDA 0x0965 +#define ISCII_NUKTA 0xE9 +#define ISCII_HALANT 0xE8 +#define ISCII_DANDA 0xEA +#define ISCII_INV 0xD9 +#define ISCII_VOWEL_SIGN_E 0xE0 +#define INDIC_BLOCK_BEGIN 0x0900 +#define INDIC_BLOCK_END 0x0D7F +#define INDIC_RANGE (INDIC_BLOCK_END - INDIC_BLOCK_BEGIN) +#define VOCALLIC_RR 0x0931 +#define LF 0x0A +#define ASCII_END 0xA0 +#define NO_CHAR_MARKER 0xFFFE +#define TELUGU_DELTA DELTA * TELUGU +#define DEV_ABBR_SIGN 0x0970 +#define DEV_ANUDATTA 0x0952 +#define EXT_RANGE_BEGIN 0xA1 +#define EXT_RANGE_END 0xEE + +#define PNJ_DELTA 0x0100 +#define PNJ_BINDI 0x0A02 +#define PNJ_TIPPI 0x0A70 +#define PNJ_SIGN_VIRAMA 0x0A4D +#define PNJ_ADHAK 0x0A71 +#define PNJ_HA 0x0A39 +#define PNJ_RRA 0x0A5C + +typedef enum { + DEVANAGARI =0, + BENGALI, + GURMUKHI, + GUJARATI, + ORIYA, + TAMIL, + TELUGU, + KANNADA, + MALAYALAM, + DELTA=0x80 +}UniLang; + +/** + * Enumeration for switching code pages if <ATR>+<one of below values> + * is encountered + */ +typedef enum { + DEF = 0x40, + RMN = 0x41, + DEV = 0x42, + BNG = 0x43, + TML = 0x44, + TLG = 0x45, + ASM = 0x46, + ORI = 0x47, + KND = 0x48, + MLM = 0x49, + GJR = 0x4A, + PNJ = 0x4B, + ARB = 0x71, + PES = 0x72, + URD = 0x73, + SND = 0x74, + KSM = 0x75, + PST = 0x76 +}ISCIILang; + +typedef enum { + DEV_MASK =0x80, + PNJ_MASK =0x40, + GJR_MASK =0x20, + ORI_MASK =0x10, + BNG_MASK =0x08, + KND_MASK =0x04, + MLM_MASK =0x02, + TML_MASK =0x01, + ZERO =0x00 +}MaskEnum; + +#define ISCII_CNV_PREFIX "ISCII,version=" + +typedef struct { + UChar contextCharToUnicode; /* previous Unicode codepoint for contextual analysis */ + UChar contextCharFromUnicode; /* previous Unicode codepoint for contextual analysis */ + uint16_t defDeltaToUnicode; /* delta for switching to default state when DEF is encountered */ + uint16_t currentDeltaFromUnicode; /* current delta in Indic block */ + uint16_t currentDeltaToUnicode; /* current delta in Indic block */ + MaskEnum currentMaskFromUnicode; /* mask for current state in toUnicode */ + MaskEnum currentMaskToUnicode; /* mask for current state in toUnicode */ + MaskEnum defMaskToUnicode; /* mask for default state in toUnicode */ + UBool isFirstBuffer; /* boolean for fromUnicode to see if we need to announce the first script */ + UBool resetToDefaultToUnicode; /* boolean for reseting to default delta and mask when a newline is encountered*/ + char name[sizeof(ISCII_CNV_PREFIX) + 1]; + UChar32 prevToUnicodeStatus; /* Hold the previous toUnicodeStatus. This is necessary because we may need to know the last two code points. */ +} UConverterDataISCII; + +typedef struct LookupDataStruct { + UniLang uniLang; + MaskEnum maskEnum; + ISCIILang isciiLang; +} LookupDataStruct; + +static const LookupDataStruct lookupInitialData[]={ + { DEVANAGARI, DEV_MASK, DEV }, + { BENGALI, BNG_MASK, BNG }, + { GURMUKHI, PNJ_MASK, PNJ }, + { GUJARATI, GJR_MASK, GJR }, + { ORIYA, ORI_MASK, ORI }, + { TAMIL, TML_MASK, TML }, + { TELUGU, KND_MASK, TLG }, + { KANNADA, KND_MASK, KND }, + { MALAYALAM, MLM_MASK, MLM } +}; + +/* + * For special handling of certain Gurmukhi characters. + * Bit 0 (value 1): PNJ consonant + * Bit 1 (value 2): PNJ Bindi Tippi + */ +static const uint8_t pnjMap[80] = { + /* 0A00..0A0F */ + 0, 0, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0A10..0A1F */ + 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + /* 0A20..0A2F */ + 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, + /* 0A30..0A3F */ + 3, 0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 2, + /* 0A40..0A4F */ + 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static UBool +isPNJConsonant(UChar32 c) { + if (c < 0xa00 || 0xa50 <= c) { + return FALSE; + } else { + return (UBool)(pnjMap[c - 0xa00] & 1); + } +} + +static UBool +isPNJBindiTippi(UChar32 c) { + if (c < 0xa00 || 0xa50 <= c) { + return FALSE; + } else { + return (UBool)(pnjMap[c - 0xa00] >> 1); + } +} +U_CDECL_BEGIN +static void U_CALLCONV +_ISCIIOpen(UConverter *cnv, UConverterLoadArgs *pArgs, UErrorCode *errorCode) { + if(pArgs->onlyTestIsLoadable) { + return; + } + + cnv->extraInfo = uprv_malloc(sizeof(UConverterDataISCII)); + + if (cnv->extraInfo != NULL) { + int32_t len=0; + UConverterDataISCII *converterData= + (UConverterDataISCII *) cnv->extraInfo; + converterData->contextCharToUnicode=NO_CHAR_MARKER; + cnv->toUnicodeStatus = missingCharMarker; + converterData->contextCharFromUnicode=0x0000; + converterData->resetToDefaultToUnicode=FALSE; + /* check if the version requested is supported */ + if ((pArgs->options & UCNV_OPTIONS_VERSION_MASK) < 9) { + /* initialize state variables */ + converterData->currentDeltaFromUnicode + = converterData->currentDeltaToUnicode + = converterData->defDeltaToUnicode = (uint16_t)(lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].uniLang * DELTA); + + converterData->currentMaskFromUnicode + = converterData->currentMaskToUnicode + = converterData->defMaskToUnicode = lookupInitialData[pArgs->options & UCNV_OPTIONS_VERSION_MASK].maskEnum; + + converterData->isFirstBuffer=TRUE; + (void)uprv_strcpy(converterData->name, ISCII_CNV_PREFIX); + len = (int32_t)uprv_strlen(converterData->name); + converterData->name[len]= (char)((pArgs->options & UCNV_OPTIONS_VERSION_MASK) + '0'); + converterData->name[len+1]=0; + + converterData->prevToUnicodeStatus = 0x0000; + } else { + uprv_free(cnv->extraInfo); + cnv->extraInfo = NULL; + *errorCode = U_ILLEGAL_ARGUMENT_ERROR; + } + + } else { + *errorCode =U_MEMORY_ALLOCATION_ERROR; + } +} + +static void U_CALLCONV +_ISCIIClose(UConverter *cnv) { + if (cnv->extraInfo!=NULL) { + if (!cnv->isExtraLocal) { + uprv_free(cnv->extraInfo); + } + cnv->extraInfo=NULL; + } +} + +static const char* U_CALLCONV +_ISCIIgetName(const UConverter* cnv) { + if (cnv->extraInfo) { + UConverterDataISCII* myData= (UConverterDataISCII*)cnv->extraInfo; + return myData->name; + } + return NULL; +} + +static void U_CALLCONV +_ISCIIReset(UConverter *cnv, UConverterResetChoice choice) { + UConverterDataISCII* data =(UConverterDataISCII *) (cnv->extraInfo); + if (choice<=UCNV_RESET_TO_UNICODE) { + cnv->toUnicodeStatus = missingCharMarker; + cnv->mode=0; + data->currentDeltaToUnicode=data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + data->contextCharToUnicode=NO_CHAR_MARKER; + data->prevToUnicodeStatus = 0x0000; + } + if (choice!=UCNV_RESET_TO_UNICODE) { + cnv->fromUChar32=0x0000; + data->contextCharFromUnicode=0x00; + data->currentMaskFromUnicode=data->defMaskToUnicode; + data->currentDeltaFromUnicode=data->defDeltaToUnicode; + data->isFirstBuffer=TRUE; + data->resetToDefaultToUnicode=FALSE; + } +} + +/** + * The values in validity table are indexed by the lower bits of Unicode + * range 0x0900 - 0x09ff. The values have a structure like: + * --------------------------------------------------------------- + * | DEV | PNJ | GJR | ORI | BNG | TLG | MLM | TML | + * | | | | | ASM | KND | | | + * --------------------------------------------------------------- + * If a code point is valid in a particular script + * then that bit is turned on + * + * Unicode does not distinguish between Bengali and Assamese so we use 1 bit for + * to represent these languages + * + * Telugu and Kannada have same codepoints except for Vocallic_RR which we special case + * and combine and use 1 bit to represent these languages. + * + * TODO: It is probably easier to understand and maintain to change this + * to use uint16_t and give each of the 9 Unicode/script blocks its own bit. + */ + +static const uint8_t validityTable[128] = { +/* This state table is tool generated please do not edit unless you know exactly what you are doing */ +/* Note: This table was edited to mirror the Windows XP implementation */ +/*ISCII:Valid:Unicode */ +/*0xa0 : 0x00: 0x900 */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xa1 : 0xb8: 0x901 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0xa2 : 0xfe: 0x902 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa3 : 0xbf: 0x903 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x00: 0x904 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xa4 : 0xff: 0x905 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa5 : 0xff: 0x906 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa6 : 0xff: 0x907 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa7 : 0xff: 0x908 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa8 : 0xff: 0x909 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xa9 : 0xff: 0x90a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xaa : 0xfe: 0x90b */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x90c */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xae : 0x80: 0x90d */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xab : 0x87: 0x90e */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xac : 0xff: 0x90f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xad : 0xff: 0x910 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb2 : 0x80: 0x911 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xaf : 0x87: 0x912 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xb0 : 0xff: 0x913 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb1 : 0xff: 0x914 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb3 : 0xff: 0x915 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb4 : 0xfe: 0x916 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb5 : 0xfe: 0x917 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb6 : 0xfe: 0x918 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xb7 : 0xff: 0x919 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb8 : 0xff: 0x91a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xb9 : 0xfe: 0x91b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xba : 0xff: 0x91c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbb : 0xfe: 0x91d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xbc : 0xff: 0x91e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbd : 0xff: 0x91f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xbe : 0xfe: 0x920 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xbf : 0xfe: 0x921 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc0 : 0xfe: 0x922 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc1 : 0xff: 0x923 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc2 : 0xff: 0x924 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc3 : 0xfe: 0x925 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc4 : 0xfe: 0x926 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc5 : 0xfe: 0x927 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xc6 : 0xff: 0x928 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc7 : 0x81: 0x929 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + TML_MASK , +/*0xc8 : 0xff: 0x92a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xc9 : 0xfe: 0x92b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xca : 0xfe: 0x92c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xcb : 0xfe: 0x92d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xcc : 0xfe: 0x92e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xcd : 0xff: 0x92f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xcf : 0xff: 0x930 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd0 : 0x87: 0x931 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , +/*0xd1 : 0xff: 0x932 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd2 : 0xb7: 0x933 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xd3 : 0x83: 0x934 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + MLM_MASK + TML_MASK , +/*0xd4 : 0xff: 0x935 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xd5 : 0xfe: 0x936 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0xd6 : 0xbf: 0x937 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd7 : 0xff: 0x938 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xd8 : 0xff: 0x939 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x00: 0x93A */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x93B */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe9 : 0xda: 0x93c */ DEV_MASK + PNJ_MASK + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x93d */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xda : 0xff: 0x93e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdb : 0xff: 0x93f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdc : 0xff: 0x940 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdd : 0xff: 0x941 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xde : 0xff: 0x942 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xdf : 0xbe: 0x943 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x944 */ DEV_MASK + ZERO + GJR_MASK + ZERO + BNG_MASK + KND_MASK + ZERO + ZERO , +/*0xe3 : 0x80: 0x945 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe0 : 0x87: 0x946 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xe1 : 0xff: 0x947 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe2 : 0xff: 0x948 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe7 : 0x80: 0x949 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xe4 : 0x87: 0x94a */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + KND_MASK + MLM_MASK + TML_MASK , +/*0xe5 : 0xff: 0x94b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe6 : 0xff: 0x94c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xe8 : 0xff: 0x94d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xec : 0x00: 0x94e */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xed : 0x00: 0x94f */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x950 */ DEV_MASK + ZERO + GJR_MASK + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x951 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x952 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x953 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x954 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x955 */ ZERO + ZERO + ZERO + ZERO + ZERO + KND_MASK + ZERO + ZERO , +/*0x00 : 0x00: 0x956 */ ZERO + ZERO + ZERO + ORI_MASK + ZERO + KND_MASK + ZERO + ZERO , +/*0x00 : 0x00: 0x957 */ ZERO + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x958 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x959 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95a */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95b */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95c */ DEV_MASK + PNJ_MASK + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95d */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x95e */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xce : 0x98: 0x95f */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x960 */ DEV_MASK + ZERO + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x961 */ DEV_MASK + ZERO + ZERO + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + ZERO , +/*0x00 : 0x00: 0x962 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0x00 : 0x00: 0x963 */ DEV_MASK + ZERO + ZERO + ZERO + BNG_MASK + ZERO + ZERO + ZERO , +/*0xea : 0xf8: 0x964 */ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xeaea : 0x00: 0x965*/ DEV_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/*0xf1 : 0xff: 0x966 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf2 : 0xff: 0x967 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf3 : 0xff: 0x968 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf4 : 0xff: 0x969 */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf5 : 0xff: 0x96a */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf6 : 0xff: 0x96b */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf7 : 0xff: 0x96c */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf8 : 0xff: 0x96d */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xf9 : 0xff: 0x96e */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0xfa : 0xff: 0x96f */ DEV_MASK + PNJ_MASK + GJR_MASK + ORI_MASK + BNG_MASK + KND_MASK + MLM_MASK + TML_MASK , +/*0x00 : 0x80: 0x970 */ DEV_MASK + PNJ_MASK + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO , +/* + * The length of the array is 128 to provide values for 0x900..0x97f. + * The last 15 entries for 0x971..0x97f of the validity table are all zero + * because no Indic script uses such Unicode code points. + */ +/*0x00 : 0x00: 0x9yz */ ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO + ZERO +}; + +static const uint16_t fromUnicodeTable[128]={ + 0x00a0 ,/* 0x0900 */ + 0x00a1 ,/* 0x0901 */ + 0x00a2 ,/* 0x0902 */ + 0x00a3 ,/* 0x0903 */ + 0xa4e0 ,/* 0x0904 */ + 0x00a4 ,/* 0x0905 */ + 0x00a5 ,/* 0x0906 */ + 0x00a6 ,/* 0x0907 */ + 0x00a7 ,/* 0x0908 */ + 0x00a8 ,/* 0x0909 */ + 0x00a9 ,/* 0x090a */ + 0x00aa ,/* 0x090b */ + 0xA6E9 ,/* 0x090c */ + 0x00ae ,/* 0x090d */ + 0x00ab ,/* 0x090e */ + 0x00ac ,/* 0x090f */ + 0x00ad ,/* 0x0910 */ + 0x00b2 ,/* 0x0911 */ + 0x00af ,/* 0x0912 */ + 0x00b0 ,/* 0x0913 */ + 0x00b1 ,/* 0x0914 */ + 0x00b3 ,/* 0x0915 */ + 0x00b4 ,/* 0x0916 */ + 0x00b5 ,/* 0x0917 */ + 0x00b6 ,/* 0x0918 */ + 0x00b7 ,/* 0x0919 */ + 0x00b8 ,/* 0x091a */ + 0x00b9 ,/* 0x091b */ + 0x00ba ,/* 0x091c */ + 0x00bb ,/* 0x091d */ + 0x00bc ,/* 0x091e */ + 0x00bd ,/* 0x091f */ + 0x00be ,/* 0x0920 */ + 0x00bf ,/* 0x0921 */ + 0x00c0 ,/* 0x0922 */ + 0x00c1 ,/* 0x0923 */ + 0x00c2 ,/* 0x0924 */ + 0x00c3 ,/* 0x0925 */ + 0x00c4 ,/* 0x0926 */ + 0x00c5 ,/* 0x0927 */ + 0x00c6 ,/* 0x0928 */ + 0x00c7 ,/* 0x0929 */ + 0x00c8 ,/* 0x092a */ + 0x00c9 ,/* 0x092b */ + 0x00ca ,/* 0x092c */ + 0x00cb ,/* 0x092d */ + 0x00cc ,/* 0x092e */ + 0x00cd ,/* 0x092f */ + 0x00cf ,/* 0x0930 */ + 0x00d0 ,/* 0x0931 */ + 0x00d1 ,/* 0x0932 */ + 0x00d2 ,/* 0x0933 */ + 0x00d3 ,/* 0x0934 */ + 0x00d4 ,/* 0x0935 */ + 0x00d5 ,/* 0x0936 */ + 0x00d6 ,/* 0x0937 */ + 0x00d7 ,/* 0x0938 */ + 0x00d8 ,/* 0x0939 */ + 0xFFFF ,/* 0x093A */ + 0xFFFF ,/* 0x093B */ + 0x00e9 ,/* 0x093c */ + 0xEAE9 ,/* 0x093d */ + 0x00da ,/* 0x093e */ + 0x00db ,/* 0x093f */ + 0x00dc ,/* 0x0940 */ + 0x00dd ,/* 0x0941 */ + 0x00de ,/* 0x0942 */ + 0x00df ,/* 0x0943 */ + 0xDFE9 ,/* 0x0944 */ + 0x00e3 ,/* 0x0945 */ + 0x00e0 ,/* 0x0946 */ + 0x00e1 ,/* 0x0947 */ + 0x00e2 ,/* 0x0948 */ + 0x00e7 ,/* 0x0949 */ + 0x00e4 ,/* 0x094a */ + 0x00e5 ,/* 0x094b */ + 0x00e6 ,/* 0x094c */ + 0x00e8 ,/* 0x094d */ + 0x00ec ,/* 0x094e */ + 0x00ed ,/* 0x094f */ + 0xA1E9 ,/* 0x0950 */ /* OM Symbol */ + 0xFFFF ,/* 0x0951 */ + 0xF0B8 ,/* 0x0952 */ + 0xFFFF ,/* 0x0953 */ + 0xFFFF ,/* 0x0954 */ + 0xFFFF ,/* 0x0955 */ + 0xFFFF ,/* 0x0956 */ + 0xFFFF ,/* 0x0957 */ + 0xb3e9 ,/* 0x0958 */ + 0xb4e9 ,/* 0x0959 */ + 0xb5e9 ,/* 0x095a */ + 0xbae9 ,/* 0x095b */ + 0xbfe9 ,/* 0x095c */ + 0xC0E9 ,/* 0x095d */ + 0xc9e9 ,/* 0x095e */ + 0x00ce ,/* 0x095f */ + 0xAAe9 ,/* 0x0960 */ + 0xA7E9 ,/* 0x0961 */ + 0xDBE9 ,/* 0x0962 */ + 0xDCE9 ,/* 0x0963 */ + 0x00ea ,/* 0x0964 */ + 0xeaea ,/* 0x0965 */ + 0x00f1 ,/* 0x0966 */ + 0x00f2 ,/* 0x0967 */ + 0x00f3 ,/* 0x0968 */ + 0x00f4 ,/* 0x0969 */ + 0x00f5 ,/* 0x096a */ + 0x00f6 ,/* 0x096b */ + 0x00f7 ,/* 0x096c */ + 0x00f8 ,/* 0x096d */ + 0x00f9 ,/* 0x096e */ + 0x00fa ,/* 0x096f */ + 0xF0BF ,/* 0x0970 */ + 0xFFFF ,/* 0x0971 */ + 0xFFFF ,/* 0x0972 */ + 0xFFFF ,/* 0x0973 */ + 0xFFFF ,/* 0x0974 */ + 0xFFFF ,/* 0x0975 */ + 0xFFFF ,/* 0x0976 */ + 0xFFFF ,/* 0x0977 */ + 0xFFFF ,/* 0x0978 */ + 0xFFFF ,/* 0x0979 */ + 0xFFFF ,/* 0x097a */ + 0xFFFF ,/* 0x097b */ + 0xFFFF ,/* 0x097c */ + 0xFFFF ,/* 0x097d */ + 0xFFFF ,/* 0x097e */ + 0xFFFF ,/* 0x097f */ +}; +static const uint16_t toUnicodeTable[256]={ + 0x0000,/* 0x00 */ + 0x0001,/* 0x01 */ + 0x0002,/* 0x02 */ + 0x0003,/* 0x03 */ + 0x0004,/* 0x04 */ + 0x0005,/* 0x05 */ + 0x0006,/* 0x06 */ + 0x0007,/* 0x07 */ + 0x0008,/* 0x08 */ + 0x0009,/* 0x09 */ + 0x000a,/* 0x0a */ + 0x000b,/* 0x0b */ + 0x000c,/* 0x0c */ + 0x000d,/* 0x0d */ + 0x000e,/* 0x0e */ + 0x000f,/* 0x0f */ + 0x0010,/* 0x10 */ + 0x0011,/* 0x11 */ + 0x0012,/* 0x12 */ + 0x0013,/* 0x13 */ + 0x0014,/* 0x14 */ + 0x0015,/* 0x15 */ + 0x0016,/* 0x16 */ + 0x0017,/* 0x17 */ + 0x0018,/* 0x18 */ + 0x0019,/* 0x19 */ + 0x001a,/* 0x1a */ + 0x001b,/* 0x1b */ + 0x001c,/* 0x1c */ + 0x001d,/* 0x1d */ + 0x001e,/* 0x1e */ + 0x001f,/* 0x1f */ + 0x0020,/* 0x20 */ + 0x0021,/* 0x21 */ + 0x0022,/* 0x22 */ + 0x0023,/* 0x23 */ + 0x0024,/* 0x24 */ + 0x0025,/* 0x25 */ + 0x0026,/* 0x26 */ + 0x0027,/* 0x27 */ + 0x0028,/* 0x28 */ + 0x0029,/* 0x29 */ + 0x002a,/* 0x2a */ + 0x002b,/* 0x2b */ + 0x002c,/* 0x2c */ + 0x002d,/* 0x2d */ + 0x002e,/* 0x2e */ + 0x002f,/* 0x2f */ + 0x0030,/* 0x30 */ + 0x0031,/* 0x31 */ + 0x0032,/* 0x32 */ + 0x0033,/* 0x33 */ + 0x0034,/* 0x34 */ + 0x0035,/* 0x35 */ + 0x0036,/* 0x36 */ + 0x0037,/* 0x37 */ + 0x0038,/* 0x38 */ + 0x0039,/* 0x39 */ + 0x003A,/* 0x3A */ + 0x003B,/* 0x3B */ + 0x003c,/* 0x3c */ + 0x003d,/* 0x3d */ + 0x003e,/* 0x3e */ + 0x003f,/* 0x3f */ + 0x0040,/* 0x40 */ + 0x0041,/* 0x41 */ + 0x0042,/* 0x42 */ + 0x0043,/* 0x43 */ + 0x0044,/* 0x44 */ + 0x0045,/* 0x45 */ + 0x0046,/* 0x46 */ + 0x0047,/* 0x47 */ + 0x0048,/* 0x48 */ + 0x0049,/* 0x49 */ + 0x004a,/* 0x4a */ + 0x004b,/* 0x4b */ + 0x004c,/* 0x4c */ + 0x004d,/* 0x4d */ + 0x004e,/* 0x4e */ + 0x004f,/* 0x4f */ + 0x0050,/* 0x50 */ + 0x0051,/* 0x51 */ + 0x0052,/* 0x52 */ + 0x0053,/* 0x53 */ + 0x0054,/* 0x54 */ + 0x0055,/* 0x55 */ + 0x0056,/* 0x56 */ + 0x0057,/* 0x57 */ + 0x0058,/* 0x58 */ + 0x0059,/* 0x59 */ + 0x005a,/* 0x5a */ + 0x005b,/* 0x5b */ + 0x005c,/* 0x5c */ + 0x005d,/* 0x5d */ + 0x005e,/* 0x5e */ + 0x005f,/* 0x5f */ + 0x0060,/* 0x60 */ + 0x0061,/* 0x61 */ + 0x0062,/* 0x62 */ + 0x0063,/* 0x63 */ + 0x0064,/* 0x64 */ + 0x0065,/* 0x65 */ + 0x0066,/* 0x66 */ + 0x0067,/* 0x67 */ + 0x0068,/* 0x68 */ + 0x0069,/* 0x69 */ + 0x006a,/* 0x6a */ + 0x006b,/* 0x6b */ + 0x006c,/* 0x6c */ + 0x006d,/* 0x6d */ + 0x006e,/* 0x6e */ + 0x006f,/* 0x6f */ + 0x0070,/* 0x70 */ + 0x0071,/* 0x71 */ + 0x0072,/* 0x72 */ + 0x0073,/* 0x73 */ + 0x0074,/* 0x74 */ + 0x0075,/* 0x75 */ + 0x0076,/* 0x76 */ + 0x0077,/* 0x77 */ + 0x0078,/* 0x78 */ + 0x0079,/* 0x79 */ + 0x007a,/* 0x7a */ + 0x007b,/* 0x7b */ + 0x007c,/* 0x7c */ + 0x007d,/* 0x7d */ + 0x007e,/* 0x7e */ + 0x007f,/* 0x7f */ + 0x0080,/* 0x80 */ + 0x0081,/* 0x81 */ + 0x0082,/* 0x82 */ + 0x0083,/* 0x83 */ + 0x0084,/* 0x84 */ + 0x0085,/* 0x85 */ + 0x0086,/* 0x86 */ + 0x0087,/* 0x87 */ + 0x0088,/* 0x88 */ + 0x0089,/* 0x89 */ + 0x008a,/* 0x8a */ + 0x008b,/* 0x8b */ + 0x008c,/* 0x8c */ + 0x008d,/* 0x8d */ + 0x008e,/* 0x8e */ + 0x008f,/* 0x8f */ + 0x0090,/* 0x90 */ + 0x0091,/* 0x91 */ + 0x0092,/* 0x92 */ + 0x0093,/* 0x93 */ + 0x0094,/* 0x94 */ + 0x0095,/* 0x95 */ + 0x0096,/* 0x96 */ + 0x0097,/* 0x97 */ + 0x0098,/* 0x98 */ + 0x0099,/* 0x99 */ + 0x009a,/* 0x9a */ + 0x009b,/* 0x9b */ + 0x009c,/* 0x9c */ + 0x009d,/* 0x9d */ + 0x009e,/* 0x9e */ + 0x009f,/* 0x9f */ + 0x00A0,/* 0xa0 */ + 0x0901,/* 0xa1 */ + 0x0902,/* 0xa2 */ + 0x0903,/* 0xa3 */ + 0x0905,/* 0xa4 */ + 0x0906,/* 0xa5 */ + 0x0907,/* 0xa6 */ + 0x0908,/* 0xa7 */ + 0x0909,/* 0xa8 */ + 0x090a,/* 0xa9 */ + 0x090b,/* 0xaa */ + 0x090e,/* 0xab */ + 0x090f,/* 0xac */ + 0x0910,/* 0xad */ + 0x090d,/* 0xae */ + 0x0912,/* 0xaf */ + 0x0913,/* 0xb0 */ + 0x0914,/* 0xb1 */ + 0x0911,/* 0xb2 */ + 0x0915,/* 0xb3 */ + 0x0916,/* 0xb4 */ + 0x0917,/* 0xb5 */ + 0x0918,/* 0xb6 */ + 0x0919,/* 0xb7 */ + 0x091a,/* 0xb8 */ + 0x091b,/* 0xb9 */ + 0x091c,/* 0xba */ + 0x091d,/* 0xbb */ + 0x091e,/* 0xbc */ + 0x091f,/* 0xbd */ + 0x0920,/* 0xbe */ + 0x0921,/* 0xbf */ + 0x0922,/* 0xc0 */ + 0x0923,/* 0xc1 */ + 0x0924,/* 0xc2 */ + 0x0925,/* 0xc3 */ + 0x0926,/* 0xc4 */ + 0x0927,/* 0xc5 */ + 0x0928,/* 0xc6 */ + 0x0929,/* 0xc7 */ + 0x092a,/* 0xc8 */ + 0x092b,/* 0xc9 */ + 0x092c,/* 0xca */ + 0x092d,/* 0xcb */ + 0x092e,/* 0xcc */ + 0x092f,/* 0xcd */ + 0x095f,/* 0xce */ + 0x0930,/* 0xcf */ + 0x0931,/* 0xd0 */ + 0x0932,/* 0xd1 */ + 0x0933,/* 0xd2 */ + 0x0934,/* 0xd3 */ + 0x0935,/* 0xd4 */ + 0x0936,/* 0xd5 */ + 0x0937,/* 0xd6 */ + 0x0938,/* 0xd7 */ + 0x0939,/* 0xd8 */ + 0x200D,/* 0xd9 */ + 0x093e,/* 0xda */ + 0x093f,/* 0xdb */ + 0x0940,/* 0xdc */ + 0x0941,/* 0xdd */ + 0x0942,/* 0xde */ + 0x0943,/* 0xdf */ + 0x0946,/* 0xe0 */ + 0x0947,/* 0xe1 */ + 0x0948,/* 0xe2 */ + 0x0945,/* 0xe3 */ + 0x094a,/* 0xe4 */ + 0x094b,/* 0xe5 */ + 0x094c,/* 0xe6 */ + 0x0949,/* 0xe7 */ + 0x094d,/* 0xe8 */ + 0x093c,/* 0xe9 */ + 0x0964,/* 0xea */ + 0xFFFF,/* 0xeb */ + 0xFFFF,/* 0xec */ + 0xFFFF,/* 0xed */ + 0xFFFF,/* 0xee */ + 0xFFFF,/* 0xef */ + 0xFFFF,/* 0xf0 */ + 0x0966,/* 0xf1 */ + 0x0967,/* 0xf2 */ + 0x0968,/* 0xf3 */ + 0x0969,/* 0xf4 */ + 0x096a,/* 0xf5 */ + 0x096b,/* 0xf6 */ + 0x096c,/* 0xf7 */ + 0x096d,/* 0xf8 */ + 0x096e,/* 0xf9 */ + 0x096f,/* 0xfa */ + 0xFFFF,/* 0xfb */ + 0xFFFF,/* 0xfc */ + 0xFFFF,/* 0xfd */ + 0xFFFF,/* 0xfe */ + 0xFFFF /* 0xff */ +}; + +static const uint16_t vowelSignESpecialCases[][2]={ + { 2 /*length of array*/ , 0 }, + { 0xA4 , 0x0904 }, +}; + +static const uint16_t nuktaSpecialCases[][2]={ + { 16 /*length of array*/ , 0 }, + { 0xA6 , 0x090c }, + { 0xEA , 0x093D }, + { 0xDF , 0x0944 }, + { 0xA1 , 0x0950 }, + { 0xb3 , 0x0958 }, + { 0xb4 , 0x0959 }, + { 0xb5 , 0x095a }, + { 0xba , 0x095b }, + { 0xbf , 0x095c }, + { 0xC0 , 0x095d }, + { 0xc9 , 0x095e }, + { 0xAA , 0x0960 }, + { 0xA7 , 0x0961 }, + { 0xDB , 0x0962 }, + { 0xDC , 0x0963 }, +}; + + +#define WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err) UPRV_BLOCK_MACRO_BEGIN { \ + int32_t offset = (int32_t)(source - args->source-1); \ + /* write the targetUniChar to target */ \ + if(target < targetLimit){ \ + if(targetByteUnit <= 0xFF){ \ + *(target)++ = (uint8_t)(targetByteUnit); \ + if(offsets){ \ + *(offsets++) = offset; \ + } \ + }else{ \ + if (targetByteUnit > 0xFFFF) { \ + *(target)++ = (uint8_t)(targetByteUnit>>16); \ + if (offsets) { \ + --offset; \ + *(offsets++) = offset; \ + } \ + } \ + if (!(target < targetLimit)) { \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t)(targetByteUnit >> 8); \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t)targetByteUnit; \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } else { \ + *(target)++ = (uint8_t)(targetByteUnit>>8); \ + if(offsets){ \ + *(offsets++) = offset; \ + } \ + if(target < targetLimit){ \ + *(target)++ = (uint8_t) targetByteUnit; \ + if(offsets){ \ + *(offsets++) = offset ; \ + } \ + }else{ \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] =\ + (uint8_t) (targetByteUnit); \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ + } \ + } \ + }else{ \ + if (targetByteUnit & 0xFF0000) { \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit >>16); \ + } \ + if(targetByteUnit & 0xFF00){ \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit >>8); \ + } \ + args->converter->charErrorBuffer[args->converter->charErrorBufferLength++] = \ + (uint8_t) (targetByteUnit); \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ +} UPRV_BLOCK_MACRO_END + +/* Rules: + * Explicit Halant : + * <HALANT> + <ZWNJ> + * Soft Halant : + * <HALANT> + <ZWJ> + */ +static void U_CALLCONV +UConverter_fromUnicode_ISCII_OFFSETS_LOGIC( + UConverterFromUnicodeArgs * args, UErrorCode * err) { + const UChar *source = args->source; + const UChar *sourceLimit = args->sourceLimit; + unsigned char *target = (unsigned char *) args->target; + unsigned char *targetLimit = (unsigned char *) args->targetLimit; + int32_t* offsets = args->offsets; + uint32_t targetByteUnit = 0x0000; + UChar32 sourceChar = 0x0000; + UChar32 tempContextFromUnicode = 0x0000; /* For special handling of the Gurmukhi script. */ + UConverterDataISCII *converterData; + uint16_t newDelta=0; + uint16_t range = 0; + UBool deltaChanged = FALSE; + + if ((args->converter == NULL) || (args->targetLimit < args->target) || (args->sourceLimit < args->source)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + /* initialize data */ + converterData=(UConverterDataISCII*)args->converter->extraInfo; + newDelta=converterData->currentDeltaFromUnicode; + range = (uint16_t)(newDelta/DELTA); + + if ((sourceChar = args->converter->fromUChar32)!=0) { + goto getTrail; + } + + /*writing the char to the output stream */ + while (source < sourceLimit) { + /* Write the language code following LF only if LF is not the last character. */ + if (args->converter->fromUnicodeStatus == LF) { + targetByteUnit = ATR<<8; + targetByteUnit += (uint8_t) lookupInitialData[range].isciiLang; + args->converter->fromUnicodeStatus = 0x0000; + /* now append ATR and language code */ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } + + sourceChar = *source++; + tempContextFromUnicode = converterData->contextCharFromUnicode; + + targetByteUnit = missingCharMarker; + + /*check if input is in ASCII and C0 control codes range*/ + if (sourceChar <= ASCII_END) { + args->converter->fromUnicodeStatus = sourceChar; + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,sourceChar,err); + if (U_FAILURE(*err)) { + break; + } + continue; + } + switch (sourceChar) { + case ZWNJ: + /* contextChar has HALANT */ + if (converterData->contextCharFromUnicode) { + converterData->contextCharFromUnicode = 0x00; + targetByteUnit = ISCII_HALANT; + } else { + /* consume ZWNJ and continue */ + converterData->contextCharFromUnicode = 0x00; + continue; + } + break; + case ZWJ: + /* contextChar has HALANT */ + if (converterData->contextCharFromUnicode) { + targetByteUnit = ISCII_NUKTA; + } else { + targetByteUnit =ISCII_INV; + } + converterData->contextCharFromUnicode = 0x00; + break; + default: + /* is the sourceChar in the INDIC_RANGE? */ + if ((uint16_t)(INDIC_BLOCK_END-sourceChar) <= INDIC_RANGE) { + /* Danda and Double Danda are valid in Northern scripts.. since Unicode + * does not include these codepoints in all Northern scrips we need to + * filter them out + */ + if (sourceChar!= DANDA && sourceChar != DOUBLE_DANDA) { + /* find out to which block the souceChar belongs*/ + range =(uint16_t)((sourceChar-INDIC_BLOCK_BEGIN)/DELTA); + newDelta =(uint16_t)(range*DELTA); + + /* Now are we in the same block as the previous? */ + if (newDelta!= converterData->currentDeltaFromUnicode || converterData->isFirstBuffer) { + converterData->currentDeltaFromUnicode = newDelta; + converterData->currentMaskFromUnicode = lookupInitialData[range].maskEnum; + deltaChanged =TRUE; + converterData->isFirstBuffer=FALSE; + } + + if (converterData->currentDeltaFromUnicode == PNJ_DELTA) { + if (sourceChar == PNJ_TIPPI) { + /* Make sure Tippi is converterd to Bindi. */ + sourceChar = PNJ_BINDI; + } else if (sourceChar == PNJ_ADHAK) { + /* This is for consonant cluster handling. */ + converterData->contextCharFromUnicode = PNJ_ADHAK; + } + + } + /* Normalize all Indic codepoints to Devanagari and map them to ISCII */ + /* now subtract the new delta from sourceChar*/ + sourceChar -= converterData->currentDeltaFromUnicode; + } + + /* get the target byte unit */ + targetByteUnit=fromUnicodeTable[(uint8_t)sourceChar]; + + /* is the code point valid in current script? */ + if ((validityTable[(uint8_t)sourceChar] & converterData->currentMaskFromUnicode)==0) { + /* Vocallic RR is assigned in ISCII Telugu and Unicode */ + if (converterData->currentDeltaFromUnicode!=(TELUGU_DELTA) || sourceChar!=VOCALLIC_RR) { + targetByteUnit=missingCharMarker; + } + } + + if (deltaChanged) { + /* we are in a script block which is different than + * previous sourceChar's script block write ATR and language codes + */ + uint32_t temp=0; + temp =(uint16_t)(ATR<<8); + temp += (uint16_t)((uint8_t) lookupInitialData[range].isciiLang); + /* reset */ + deltaChanged=FALSE; + /* now append ATR and language code */ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,temp,err); + if (U_FAILURE(*err)) { + break; + } + } + + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && (sourceChar + PNJ_DELTA) == PNJ_ADHAK) { + continue; + } + } + /* reset context char */ + converterData->contextCharFromUnicode = 0x00; + break; + } + if (converterData->currentDeltaFromUnicode == PNJ_DELTA && tempContextFromUnicode == PNJ_ADHAK && isPNJConsonant((sourceChar + PNJ_DELTA))) { + /* If the previous codepoint is Adhak and the current codepoint is a consonant, the targetByteUnit should be C + Halant + C. */ + /* reset context char */ + converterData->contextCharFromUnicode = 0x0000; + targetByteUnit = targetByteUnit << 16 | ISCII_HALANT << 8 | targetByteUnit; + /* write targetByteUnit to target */ + WRITE_TO_TARGET_FROM_U(args, offsets, source, target, targetLimit, targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } else if (targetByteUnit != missingCharMarker) { + if (targetByteUnit==ISCII_HALANT) { + converterData->contextCharFromUnicode = (UChar)targetByteUnit; + } + /* write targetByteUnit to target*/ + WRITE_TO_TARGET_FROM_U(args,offsets,source,target,targetLimit,targetByteUnit,err); + if (U_FAILURE(*err)) { + break; + } + } else { + /* oops.. the code point is unassigned */ + /*check if the char is a First surrogate*/ + if (U16_IS_SURROGATE(sourceChar)) { + if (U16_IS_SURROGATE_LEAD(sourceChar)) { +getTrail: + /*look ahead to find the trail surrogate*/ + if (source < sourceLimit) { + /* test the following code unit */ + UChar trail= (*source); + if (U16_IS_TRAIL(trail)) { + source++; + sourceChar=U16_GET_SUPPLEMENTARY(sourceChar, trail); + *err =U_INVALID_CHAR_FOUND; + /* convert this surrogate code point */ + /* exit this condition tree */ + } else { + /* this is an unmatched lead code unit (1st surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* no more input */ + *err = U_ZERO_ERROR; + } + } else { + /* this is an unmatched trail code unit (2nd surrogate) */ + /* callback(illegal) */ + *err=U_ILLEGAL_CHAR_FOUND; + } + } else { + /* callback(unassigned) for a BMP code point */ + *err = U_INVALID_CHAR_FOUND; + } + + args->converter->fromUChar32=sourceChar; + break; + } + }/* end while(mySourceIndex<mySourceLength) */ + + /*save the state and return */ + args->source = source; + args->target = (char*)target; +} + +static const uint16_t lookupTable[][2]={ + { ZERO, ZERO }, /*DEFALT*/ + { ZERO, ZERO }, /*ROMAN*/ + { DEVANAGARI, DEV_MASK }, + { BENGALI, BNG_MASK }, + { TAMIL, TML_MASK }, + { TELUGU, KND_MASK }, + { BENGALI, BNG_MASK }, + { ORIYA, ORI_MASK }, + { KANNADA, KND_MASK }, + { MALAYALAM, MLM_MASK }, + { GUJARATI, GJR_MASK }, + { GURMUKHI, PNJ_MASK } +}; + +#define WRITE_TO_TARGET_TO_U(args,source,target,offsets,offset,targetUniChar,delta, err) UPRV_BLOCK_MACRO_BEGIN { \ + /* add offset to current Indic Block */ \ + if(targetUniChar>ASCII_END && \ + targetUniChar != ZWJ && \ + targetUniChar != ZWNJ && \ + targetUniChar != DANDA && \ + targetUniChar != DOUBLE_DANDA){ \ + \ + targetUniChar+=(uint16_t)(delta); \ + } \ + /* now write the targetUniChar */ \ + if(target<args->targetLimit){ \ + *(target)++ = (UChar)targetUniChar; \ + if(offsets){ \ + *(offsets)++ = (int32_t)(offset); \ + } \ + }else{ \ + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++] = \ + (UChar)targetUniChar; \ + *err = U_BUFFER_OVERFLOW_ERROR; \ + } \ +} UPRV_BLOCK_MACRO_END + +#define GET_MAPPING(sourceChar,targetUniChar,data) UPRV_BLOCK_MACRO_BEGIN { \ + targetUniChar = toUnicodeTable[(sourceChar)] ; \ + /* is the code point valid in current script? */ \ + if(sourceChar> ASCII_END && \ + (validityTable[(targetUniChar & 0x7F)] & data->currentMaskToUnicode)==0){ \ + /* Vocallic RR is assigne in ISCII Telugu and Unicode */ \ + if(data->currentDeltaToUnicode!=(TELUGU_DELTA) || \ + targetUniChar!=VOCALLIC_RR){ \ + targetUniChar=missingCharMarker; \ + } \ + } \ +} UPRV_BLOCK_MACRO_END + +/*********** + * Rules for ISCII to Unicode converter + * ISCII is stateful encoding. To convert ISCII bytes to Unicode, + * which has both precomposed and decomposed forms characters + * pre-context and post-context need to be considered. + * + * Post context + * i) ATR : Attribute code is used to declare the font and script switching. + * Currently we only switch scripts and font codes consumed without generating an error + * ii) EXT : Extention code is used to declare switching to Sanskrit and for obscure, + * obsolete characters + * Pre context + * i) Halant: if preceeded by a halant then it is a explicit halant + * ii) Nukta : + * a) if preceeded by a halant then it is a soft halant + * b) if preceeded by specific consonants and the ligatures have pre-composed + * characters in Unicode then convert to pre-composed characters + * iii) Danda: If Danda is preceeded by a Danda then convert to Double Danda + * + */ + +static void U_CALLCONV +UConverter_toUnicode_ISCII_OFFSETS_LOGIC(UConverterToUnicodeArgs *args, UErrorCode* err) { + const char *source = ( char *) args->source; + UChar *target = args->target; + const char *sourceLimit = args->sourceLimit; + const UChar* targetLimit = args->targetLimit; + uint32_t targetUniChar = 0x0000; + uint8_t sourceChar = 0x0000; + UConverterDataISCII* data; + UChar32* toUnicodeStatus=NULL; + UChar32 tempTargetUniChar = 0x0000; + UChar* contextCharToUnicode= NULL; + UBool found; + int i; + int offset = 0; + + if ((args->converter == NULL) || (target < args->target) || (source < args->source)) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + return; + } + + data = (UConverterDataISCII*)(args->converter->extraInfo); + contextCharToUnicode = &data->contextCharToUnicode; /* contains previous ISCII codepoint visited */ + toUnicodeStatus = (UChar32*)&args->converter->toUnicodeStatus;/* contains the mapping to Unicode of the above codepoint*/ + + while (U_SUCCESS(*err) && source<sourceLimit) { + + targetUniChar = missingCharMarker; + + if (target < targetLimit) { + sourceChar = (unsigned char)*(source)++; + + /* look at the post-context preform special processing */ + if (*contextCharToUnicode==ATR) { + + /* If we have ATR in *contextCharToUnicode then we need to change our + * state to the Indic Script specified by sourceChar + */ + + /* check if the sourceChar is supported script range*/ + if ((uint8_t)(PNJ-sourceChar)<=PNJ-DEV) { + data->currentDeltaToUnicode = (uint16_t)(lookupTable[sourceChar & 0x0F][0] * DELTA); + data->currentMaskToUnicode = (MaskEnum)lookupTable[sourceChar & 0x0F][1]; + } else if (sourceChar==DEF) { + /* switch back to default */ + data->currentDeltaToUnicode = data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + } else { + if ((sourceChar >= 0x21 && sourceChar <= 0x3F)) { + /* these are display codes consume and continue */ + } else { + *err =U_ILLEGAL_CHAR_FOUND; + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + goto CALLBACK; + } + } + + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + + continue; + + } else if (*contextCharToUnicode==EXT) { + /* check if sourceChar is in 0xA1-0xEE range */ + if ((uint8_t) (EXT_RANGE_END - sourceChar) <= (EXT_RANGE_END - EXT_RANGE_BEGIN)) { + /* We currently support only Anudatta and Devanagari abbreviation sign */ + if (sourceChar==0xBF || sourceChar == 0xB8) { + targetUniChar = (sourceChar==0xBF) ? DEV_ABBR_SIGN : DEV_ANUDATTA; + + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + *contextCharToUnicode= NO_CHAR_MARKER; + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* write to target */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + + continue; + } + } + /* byte unit is unassigned */ + targetUniChar = missingCharMarker; + *err= U_INVALID_CHAR_FOUND; + } else { + /* only 0xA1 - 0xEE are legal after EXT char */ + *contextCharToUnicode= NO_CHAR_MARKER; + *err = U_ILLEGAL_CHAR_FOUND; + } + goto CALLBACK; + } else if (*contextCharToUnicode==ISCII_INV) { + if (sourceChar==ISCII_HALANT) { + targetUniChar = 0x0020; /* replace with space accoding to Indic FAQ */ + } else { + targetUniChar = ZWJ; + } + + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* write to target */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + /* reset */ + *contextCharToUnicode=NO_CHAR_MARKER; + } + + /* look at the pre-context and perform special processing */ + switch (sourceChar) { + case ISCII_INV: + case EXT: + case ATR: + *contextCharToUnicode = (UChar)sourceChar; + + if (*toUnicodeStatus != missingCharMarker) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); + *toUnicodeStatus = missingCharMarker; + } + continue; + case ISCII_DANDA: + /* handle double danda*/ + if (*contextCharToUnicode== ISCII_DANDA) { + targetUniChar = DOUBLE_DANDA; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + } else { + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + } + break; + case ISCII_HALANT: + /* handle explicit halant */ + if (*contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWNJ; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + } else { + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + } + break; + case 0x0A: + case 0x0D: + data->resetToDefaultToUnicode = TRUE; + GET_MAPPING(sourceChar,targetUniChar,data) + ; + *contextCharToUnicode = sourceChar; + break; + + case ISCII_VOWEL_SIGN_E: + i=1; + found=FALSE; + for (; i<vowelSignESpecialCases[0][0]; i++) { + U_ASSERT(i<UPRV_LENGTHOF(vowelSignESpecialCases)); + if (vowelSignESpecialCases[i][0]==(uint8_t)*contextCharToUnicode) { + targetUniChar=vowelSignESpecialCases[i][1]; + found=TRUE; + break; + } + } + if (found) { + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + /*targetUniChar += data->currentDeltaToUnicode ;*/ + *contextCharToUnicode= NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + break; + } + } + GET_MAPPING(sourceChar,targetUniChar,data); + *contextCharToUnicode = sourceChar; + break; + + case ISCII_NUKTA: + /* handle soft halant */ + if (*contextCharToUnicode == ISCII_HALANT) { + targetUniChar = ZWJ; + /* clear the context */ + *contextCharToUnicode = NO_CHAR_MARKER; + break; + } else if (data->currentDeltaToUnicode == PNJ_DELTA && data->contextCharToUnicode == 0xc0) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* We got here because ISCII_NUKTA was preceded by 0xc0 and we are converting Gurmukhi. + * In that case we must convert (0xc0 0xe9) to (\u0a5c\u0a4d\u0a39). + */ + targetUniChar = PNJ_RRA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + if (U_SUCCESS(*err)) { + targetUniChar = PNJ_SIGN_VIRAMA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + if (U_SUCCESS(*err)) { + targetUniChar = PNJ_HA; + WRITE_TO_TARGET_TO_U(args, source, target, args->offsets, (source-args->source)-2, targetUniChar, 0, err); + } else { + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; + } + } else { + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_SIGN_VIRAMA; + args->converter->UCharErrorBuffer[args->converter->UCharErrorBufferLength++]= PNJ_HA; + } + *toUnicodeStatus = missingCharMarker; + data->contextCharToUnicode = NO_CHAR_MARKER; + continue; + } else { + /* try to handle <CHAR> + ISCII_NUKTA special mappings */ + i=1; + found =FALSE; + for (; i<nuktaSpecialCases[0][0]; i++) { + if (nuktaSpecialCases[i][0]==(uint8_t) + *contextCharToUnicode) { + targetUniChar=nuktaSpecialCases[i][1]; + found =TRUE; + break; + } + } + if (found) { + /* find out if the mapping is valid in this state */ + if (validityTable[(uint8_t)targetUniChar] & data->currentMaskToUnicode) { + /*targetUniChar += data->currentDeltaToUnicode ;*/ + *contextCharToUnicode= NO_CHAR_MARKER; + *toUnicodeStatus = missingCharMarker; + if (data->currentDeltaToUnicode == PNJ_DELTA) { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),targetUniChar,data->currentDeltaToUnicode,err); + continue; + } + break; + } + /* else fall through to default */ + } + /* else fall through to default */ + U_FALLTHROUGH; + } + default:GET_MAPPING(sourceChar,targetUniChar,data) + ; + *contextCharToUnicode = sourceChar; + break; + } + + if (*toUnicodeStatus != missingCharMarker) { + /* Check to make sure that consonant clusters are handled correct for Gurmukhi script. */ + if (data->currentDeltaToUnicode == PNJ_DELTA && data->prevToUnicodeStatus != 0 && isPNJConsonant(data->prevToUnicodeStatus) && + (*toUnicodeStatus + PNJ_DELTA) == PNJ_SIGN_VIRAMA && ((UChar32)(targetUniChar + PNJ_DELTA) == data->prevToUnicodeStatus)) { + /* Consonant clusters C + HALANT + C should be encoded as ADHAK + C */ + offset = (int)(source-args->source - 3); + tempTargetUniChar = PNJ_ADHAK; /* This is necessary to avoid some compiler warnings. */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,tempTargetUniChar,0,err); + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,offset,data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; /* reset the previous unicode code point */ + *toUnicodeStatus = missingCharMarker; + continue; + } else { + /* Write the previous toUnicodeStatus, this was delayed to handle consonant clustering for Gurmukhi script. */ + if (data->prevToUnicodeStatus) { + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -1),data->prevToUnicodeStatus,0,err); + data->prevToUnicodeStatus = 0x0000; + } + /* Check to make sure that Bindi and Tippi are handled correctly for Gurmukhi script. + * If 0xA2 is preceded by a codepoint in the PNJ_BINDI_TIPPI_SET then the target codepoint should be Tippi instead of Bindi. + */ + if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_BINDI && isPNJBindiTippi((*toUnicodeStatus + PNJ_DELTA))) { + targetUniChar = PNJ_TIPPI - PNJ_DELTA; + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,PNJ_DELTA,err); + } else if (data->currentDeltaToUnicode == PNJ_DELTA && (targetUniChar + PNJ_DELTA) == PNJ_SIGN_VIRAMA && isPNJConsonant((*toUnicodeStatus + PNJ_DELTA))) { + /* Store the current toUnicodeStatus code point for later handling of consonant cluster in Gurmukhi. */ + data->prevToUnicodeStatus = *toUnicodeStatus + PNJ_DELTA; + } else { + /* write the previously mapped codepoint */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source-args->source -2),*toUnicodeStatus,data->currentDeltaToUnicode,err); + } + } + *toUnicodeStatus = missingCharMarker; + } + + if (targetUniChar != missingCharMarker) { + /* now save the targetUniChar for delayed write */ + *toUnicodeStatus = (UChar) targetUniChar; + if (data->resetToDefaultToUnicode==TRUE) { + data->currentDeltaToUnicode = data->defDeltaToUnicode; + data->currentMaskToUnicode = data->defMaskToUnicode; + data->resetToDefaultToUnicode=FALSE; + } + } else { + + /* we reach here only if targetUniChar == missingCharMarker + * so assign codes to reason and err + */ + *err = U_INVALID_CHAR_FOUND; +CALLBACK: + args->converter->toUBytes[0] = (uint8_t) sourceChar; + args->converter->toULength = 1; + break; + } + + } else { + *err =U_BUFFER_OVERFLOW_ERROR; + break; + } + } + + if (U_SUCCESS(*err) && args->flush && source == sourceLimit) { + /* end of the input stream */ + UConverter *cnv = args->converter; + + if (*contextCharToUnicode==ATR || *contextCharToUnicode==EXT || *contextCharToUnicode==ISCII_INV) { + /* set toUBytes[] */ + cnv->toUBytes[0] = (uint8_t)*contextCharToUnicode; + cnv->toULength = 1; + + /* avoid looping on truncated sequences */ + *contextCharToUnicode = NO_CHAR_MARKER; + } else { + cnv->toULength = 0; + } + + if (*toUnicodeStatus != missingCharMarker) { + /* output a remaining target character */ + WRITE_TO_TARGET_TO_U(args,source,target,args->offsets,(source - args->source -1),*toUnicodeStatus,data->currentDeltaToUnicode,err); + *toUnicodeStatus = missingCharMarker; + } + } + + args->target = target; + args->source = source; +} + +/* structure for SafeClone calculations */ +struct cloneISCIIStruct { + UConverter cnv; + UConverterDataISCII mydata; +}; + +static UConverter * U_CALLCONV +_ISCII_SafeClone(const UConverter *cnv, + void *stackBuffer, + int32_t *pBufferSize, + UErrorCode *status) +{ + struct cloneISCIIStruct * localClone; + int32_t bufferSizeNeeded = sizeof(struct cloneISCIIStruct); + + if (U_FAILURE(*status)) { + return 0; + } + + if (*pBufferSize == 0) { /* 'preflighting' request - set needed size into *pBufferSize */ + *pBufferSize = bufferSizeNeeded; + return 0; + } + + localClone = (struct cloneISCIIStruct *)stackBuffer; + /* ucnv.c/ucnv_safeClone() copied the main UConverter already */ + + uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(UConverterDataISCII)); + localClone->cnv.extraInfo = &localClone->mydata; + localClone->cnv.isExtraLocal = TRUE; + + return &localClone->cnv; +} + +static void U_CALLCONV +_ISCIIGetUnicodeSet(const UConverter *cnv, + const USetAdder *sa, + UConverterUnicodeSet which, + UErrorCode *pErrorCode) +{ + (void)cnv; + (void)which; + (void)pErrorCode; + int32_t idx, script; + uint8_t mask; + + /* Since all ISCII versions allow switching to other ISCII + scripts, we add all roundtrippable characters to this set. */ + sa->addRange(sa->set, 0, ASCII_END); + for (script = DEVANAGARI; script <= MALAYALAM; script++) { + mask = (uint8_t)(lookupInitialData[script].maskEnum); + for (idx = 0; idx < DELTA; idx++) { + /* added check for TELUGU character */ + if ((validityTable[idx] & mask) || (script==TELUGU && idx==0x31)) { + sa->add(sa->set, idx + (script * DELTA) + INDIC_BLOCK_BEGIN); + } + } + } + sa->add(sa->set, DANDA); + sa->add(sa->set, DOUBLE_DANDA); + sa->add(sa->set, ZWNJ); + sa->add(sa->set, ZWJ); +} +U_CDECL_END +static const UConverterImpl _ISCIIImpl={ + + UCNV_ISCII, + + NULL, + NULL, + + _ISCIIOpen, + _ISCIIClose, + _ISCIIReset, + + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, + UConverter_toUnicode_ISCII_OFFSETS_LOGIC, + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, + UConverter_fromUnicode_ISCII_OFFSETS_LOGIC, + NULL, + + NULL, + _ISCIIgetName, + NULL, + _ISCII_SafeClone, + _ISCIIGetUnicodeSet, + NULL, + NULL +}; + +static const UConverterStaticData _ISCIIStaticData={ + sizeof(UConverterStaticData), + "ISCII", + 0, + UCNV_IBM, + UCNV_ISCII, + 1, + 4, + { 0x1a, 0, 0, 0 }, + 0x1, + FALSE, + FALSE, + 0x0, + 0x0, + { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }, /* reserved */ + +}; + +const UConverterSharedData _ISCIIData= + UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ISCIIStaticData, &_ISCIIImpl); + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |