diff options
author | sereglond <sereglond@yandex-team.ru> | 2022-02-10 16:47:47 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:47 +0300 |
commit | 73bb02f2495181e0719a800f979df508924f4b71 (patch) | |
tree | c0748b5dcbade83af788c0abfa89c0383d6b779c /util/charset/unidata.h | |
parent | eb3d925534734c808602b31b38b953677f0a279f (diff) | |
download | ydb-73bb02f2495181e0719a800f979df508924f4b71.tar.gz |
Restoring authorship annotation for <sereglond@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'util/charset/unidata.h')
-rw-r--r-- | util/charset/unidata.h | 426 |
1 files changed, 213 insertions, 213 deletions
diff --git a/util/charset/unidata.h b/util/charset/unidata.h index f5ed0b5be1..400d314186 100644 --- a/util/charset/unidata.h +++ b/util/charset/unidata.h @@ -77,18 +77,18 @@ enum WC_TYPE { // TODO move no NUnicode CCL_MASK = 0x3F, IS_ASCII_XDIGIT = 1 << 6, - IS_DIGIT = 1 << 7, - IS_NONBREAK = 1 << 8, + IS_DIGIT = 1 << 7, + IS_NONBREAK = 1 << 8, - IS_PRIVATE = 1 << 9, + IS_PRIVATE = 1 << 9, - IS_COMPAT = 1 << 10, - IS_CANON = 1 << 11, + IS_COMPAT = 1 << 10, + IS_CANON = 1 << 11, - NFD_QC = 1 << 12, - NFC_QC = 1 << 13, - NFKD_QC = 1 << 14, - NFKC_QC = 1 << 15, + NFD_QC = 1 << 12, + NFC_QC = 1 << 13, + NFKD_QC = 1 << 14, + NFKC_QC = 1 << 15, BIDI_OFFSET = 16, SVAL_OFFSET = 22, @@ -97,104 +97,104 @@ enum WC_TYPE { // TODO move no NUnicode const size_t DEFCHAR_BUF = 58; // CCL_NUM + 1 #define SHIFT(i) (ULL(1) << (i)) - -namespace NUnicode { + +namespace NUnicode { using TCombining = ui8; - - namespace NPrivate { - struct TProperty { - ui32 Info; - i32 Lower; - i32 Upper; - i32 Title; - TCombining Combining; - }; - + + namespace NPrivate { + struct TProperty { + ui32 Info; + i32 Lower; + i32 Upper; + i32 Title; + TCombining Combining; + }; + extern const size_t DEFAULT_KEY; using TUnidataTable = NUnicodeTable::TTable<NUnicodeTable::TSubtable<NUnicodeTable::UNICODE_TABLE_SHIFT, NUnicodeTable::TValues<TProperty>>>; - const TUnidataTable& UnidataTable(); + const TUnidataTable& UnidataTable(); - inline const TProperty& CharProperty(wchar32 ch) { + inline const TProperty& CharProperty(wchar32 ch) { return UnidataTable().Get(ch, DEFAULT_KEY); - } - - inline ui32 CharInfo(wchar32 ch) { - return CharProperty(ch).Info; - } - - inline bool IsBidi(wchar32 ch, ui32 type) { - return ((NUnicode::NPrivate::CharInfo(ch) >> BIDI_OFFSET) & 15) == type; - } + } + + inline ui32 CharInfo(wchar32 ch) { + return CharProperty(ch).Info; + } + + inline bool IsBidi(wchar32 ch, ui32 type) { + return ((NUnicode::NPrivate::CharInfo(ch) >> BIDI_OFFSET) & 15) == type; + } + } + + inline size_t UnicodeInstancesLimit() { + return NPrivate::UnidataTable().Size(); + } + + inline TCombining DecompositionCombining(wchar32 ch) { + return NPrivate::CharProperty(ch).Combining; + } + + inline WC_TYPE CharType(wchar32 ch) { + return (WC_TYPE)(NUnicode::NPrivate::CharInfo(ch) & CCL_MASK); + } + inline bool CharHasType(wchar32 ch, ui64 type_bits) { + return (SHIFT(NUnicode::CharType(ch)) & type_bits) != 0; } - - inline size_t UnicodeInstancesLimit() { - return NPrivate::UnidataTable().Size(); - } - - inline TCombining DecompositionCombining(wchar32 ch) { - return NPrivate::CharProperty(ch).Combining; - } - - inline WC_TYPE CharType(wchar32 ch) { - return (WC_TYPE)(NUnicode::NPrivate::CharInfo(ch) & CCL_MASK); - } - inline bool CharHasType(wchar32 ch, ui64 type_bits) { - return (SHIFT(NUnicode::CharType(ch)) & type_bits) != 0; - } -} - +} + // all usefull properties -inline bool IsComposed(wchar32 ch) { - return NUnicode::NPrivate::CharInfo(ch) & (IS_COMPAT | IS_CANON); +inline bool IsComposed(wchar32 ch) { + return NUnicode::NPrivate::CharInfo(ch) & (IS_COMPAT | IS_CANON); } -inline bool IsCanonComposed(wchar32 ch) { - return NUnicode::NPrivate::CharInfo(ch) & IS_CANON; +inline bool IsCanonComposed(wchar32 ch) { + return NUnicode::NPrivate::CharInfo(ch) & IS_CANON; +} +inline bool IsCompatComposed(wchar32 ch) { + return NUnicode::NPrivate::CharInfo(ch) & IS_COMPAT; +} + +inline bool IsWhitespace(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cc_SPACE) | SHIFT(Zs_SPACE) | SHIFT(Zs_ZWSPACE) | SHIFT(Zl_LINE) | SHIFT(Zp_PARAGRAPH)); } -inline bool IsCompatComposed(wchar32 ch) { - return NUnicode::NPrivate::CharInfo(ch) & IS_COMPAT; +inline bool IsAsciiCntrl(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cc_ASCII) | SHIFT(Cc_SPACE) | SHIFT(Cc_SEPARATOR)); } - -inline bool IsWhitespace(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cc_SPACE) | SHIFT(Zs_SPACE) | SHIFT(Zs_ZWSPACE) | SHIFT(Zl_LINE) | SHIFT(Zp_PARAGRAPH)); -} -inline bool IsAsciiCntrl(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cc_ASCII) | SHIFT(Cc_SPACE) | SHIFT(Cc_SEPARATOR)); -} -inline bool IsBidiCntrl(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cf_BIDI)); +inline bool IsBidiCntrl(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cf_BIDI)); } -inline bool IsJoinCntrl(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cf_JOIN)); +inline bool IsJoinCntrl(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cf_JOIN)); } -inline bool IsFormatCntrl(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT)); +inline bool IsFormatCntrl(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT)); } -inline bool IsIgnorableCntrl(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP)); +inline bool IsIgnorableCntrl(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP)); } -inline bool IsCntrl(wchar32 ch) { - return NUnicode::CharHasType(ch, +inline bool IsCntrl(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) | SHIFT(Cc_ASCII) | SHIFT(Cc_SPACE) | SHIFT(Cc_SEPARATOR)); } -inline bool IsZerowidth(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) | SHIFT(Zs_ZWSPACE)); +inline bool IsZerowidth(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) | SHIFT(Zs_ZWSPACE)); } -inline bool IsLineSep(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Zl_LINE)); +inline bool IsLineSep(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Zl_LINE)); } -inline bool IsParaSep(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Zp_PARAGRAPH)); +inline bool IsParaSep(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Zp_PARAGRAPH)); } -inline bool IsDash(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Pd_DASH) | SHIFT(Pd_HYPHEN) | SHIFT(Sm_MINUS)); +inline bool IsDash(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Pd_DASH) | SHIFT(Pd_HYPHEN) | SHIFT(Sm_MINUS)); } -inline bool IsHyphen(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Pd_HYPHEN) | SHIFT(Po_HYPHEN)); +inline bool IsHyphen(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Pd_HYPHEN) | SHIFT(Po_HYPHEN)); } -inline bool IsQuotation(wchar32 ch) { +inline bool IsQuotation(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Po_QUOTE) | SHIFT(Ps_QUOTE) | SHIFT(Pe_QUOTE) | SHIFT(Pi_QUOTE) | SHIFT(Pf_QUOTE) | SHIFT(Po_SINGLE_QUOTE) | SHIFT(Ps_SINGLE_QUOTE) | @@ -207,180 +207,180 @@ inline bool IsSingleQuotation(wchar32 ch) { SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE)); } -inline bool IsTerminal(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Po_TERMINAL)); +inline bool IsTerminal(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Po_TERMINAL)); } -inline bool IsPairedPunct(wchar32 ch) { +inline bool IsPairedPunct(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Ps_START) | SHIFT(Pe_END) | SHIFT(Ps_QUOTE) | SHIFT(Pe_QUOTE) | SHIFT(Pi_QUOTE) | SHIFT(Pf_QUOTE) | SHIFT(Ps_SINGLE_QUOTE) | SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE)); } -inline bool IsLeftPunct(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Ps_START) | SHIFT(Ps_QUOTE) | SHIFT(Ps_SINGLE_QUOTE)); +inline bool IsLeftPunct(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Ps_START) | SHIFT(Ps_QUOTE) | SHIFT(Ps_SINGLE_QUOTE)); } -inline bool IsRightPunct(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Pe_END) | SHIFT(Pe_QUOTE) | SHIFT(Pe_SINGLE_QUOTE)); +inline bool IsRightPunct(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Pe_END) | SHIFT(Pe_QUOTE) | SHIFT(Pe_SINGLE_QUOTE)); } -inline bool IsCombining(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Mc_SPACING) | SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING)); +inline bool IsCombining(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Mc_SPACING) | SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING)); } -inline bool IsNonspacing(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING)); +inline bool IsNonspacing(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING)); } -inline bool IsAlphabetic(wchar32 ch) { +inline bool IsAlphabetic(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lu_UPPER) | SHIFT(Ll_LOWER) | SHIFT(Lt_TITLE) | SHIFT(Lm_EXTENDER) | SHIFT(Lm_LETTER) | SHIFT(Lo_OTHER) | SHIFT(Nl_LETTER)); } -inline bool IsIdeographic(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lo_IDEOGRAPH) | SHIFT(Nl_IDEOGRAPH)); +inline bool IsIdeographic(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lo_IDEOGRAPH) | SHIFT(Nl_IDEOGRAPH)); } -inline bool IsKatakana(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lo_KATAKANA)); +inline bool IsKatakana(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lo_KATAKANA)); } -inline bool IsHiragana(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lo_HIRAGANA)); +inline bool IsHiragana(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lo_HIRAGANA)); } -inline bool IsHangulLeading(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lo_LEADING)); +inline bool IsHangulLeading(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lo_LEADING)); } -inline bool IsHangulVowel(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lo_VOWEL)); +inline bool IsHangulVowel(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lo_VOWEL)); } -inline bool IsHangulTrailing(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lo_TRAILING)); +inline bool IsHangulTrailing(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lo_TRAILING)); } -inline bool IsHexdigit(wchar32 ch) { +inline bool IsHexdigit(wchar32 ch) { return NUnicode::NPrivate::CharInfo(ch) & IS_ASCII_XDIGIT; } -inline bool IsDecdigit(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT)); +inline bool IsDecdigit(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT)); } -inline bool IsNumeric(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT) | SHIFT(Nl_LETTER) | SHIFT(Nl_IDEOGRAPH) | SHIFT(No_OTHER)); +inline bool IsNumeric(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT) | SHIFT(Nl_LETTER) | SHIFT(Nl_IDEOGRAPH) | SHIFT(No_OTHER)); } -inline bool IsCurrency(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Sc_CURRENCY)); +inline bool IsCurrency(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Sc_CURRENCY)); } -inline bool IsMath(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Sm_MATH)); +inline bool IsMath(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Sm_MATH)); } -inline bool IsSymbol(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Sm_MATH) | SHIFT(Sm_MINUS) | SHIFT(Sc_CURRENCY) | SHIFT(Sk_MODIFIER) | SHIFT(So_OTHER)); +inline bool IsSymbol(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Sm_MATH) | SHIFT(Sm_MINUS) | SHIFT(Sc_CURRENCY) | SHIFT(Sk_MODIFIER) | SHIFT(So_OTHER)); } -inline bool IsLowSurrogate(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cs_LOW)); +inline bool IsLowSurrogate(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cs_LOW)); } -inline bool IsHighSurrogate(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)); +inline bool IsHighSurrogate(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)); } -inline bool IsNonbreak(wchar32 ch) { - return NUnicode::NPrivate::CharInfo(ch) & IS_NONBREAK; +inline bool IsNonbreak(wchar32 ch) { + return NUnicode::NPrivate::CharInfo(ch) & IS_NONBREAK; } -inline bool IsPrivate(wchar32 ch) { - return (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE) && !NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)); +inline bool IsPrivate(wchar32 ch) { + return (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE) && !NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)); } -inline bool IsUnassigned(wchar32 ch) { - return (NUnicode::CharType(ch) == 0) && !(NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE); +inline bool IsUnassigned(wchar32 ch) { + return (NUnicode::CharType(ch) == 0) && !(NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE); } -inline bool IsPrivateHighSurrogate(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)) && (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE); +inline bool IsPrivateHighSurrogate(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)) && (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE); } // transformations -inline wchar32 ToLower(wchar32 ch) { - return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Lower); +inline wchar32 ToLower(wchar32 ch) { + return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Lower); } -inline wchar32 ToUpper(wchar32 ch) { - return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Upper); +inline wchar32 ToUpper(wchar32 ch) { + return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Upper); } -inline wchar32 ToTitle(wchar32 ch) { - return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Title); +inline wchar32 ToTitle(wchar32 ch) { + return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Title); } - -inline int ToDigit(wchar32 ch) { - ui32 i = NUnicode::NPrivate::CharInfo(ch); + +inline int ToDigit(wchar32 ch) { + ui32 i = NUnicode::NPrivate::CharInfo(ch); return (i & IS_DIGIT) ? static_cast<int>(i >> SVAL_OFFSET) : -1; } -// BIDI properties - -inline bool IsBidiLeft(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 1); -} -inline bool IsBidiRight(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 2); -} -inline bool IsBidiEuronum(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 3); -} -inline bool IsBidiEurosep(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 4); -} -inline bool IsBidiEuroterm(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 5); -} -inline bool IsBidiArabnum(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 6); -} -inline bool IsBidiCommsep(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 7); -} -inline bool IsBidiBlocksep(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 8); -} -inline bool IsBidiSegmsep(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 9); -} -inline bool IsBidiSpace(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 10); -} -inline bool IsBidiNeutral(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 11); -} -inline bool IsBidiNotappl(wchar32 ch) { - return NUnicode::NPrivate::IsBidi(ch, 0); -} - -inline bool IsSpace(wchar32 ch) { - return IsWhitespace(ch); -} -inline bool IsLower(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Ll_LOWER)); -} -inline bool IsUpper(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Lu_UPPER)); -} +// BIDI properties + +inline bool IsBidiLeft(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 1); +} +inline bool IsBidiRight(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 2); +} +inline bool IsBidiEuronum(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 3); +} +inline bool IsBidiEurosep(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 4); +} +inline bool IsBidiEuroterm(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 5); +} +inline bool IsBidiArabnum(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 6); +} +inline bool IsBidiCommsep(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 7); +} +inline bool IsBidiBlocksep(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 8); +} +inline bool IsBidiSegmsep(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 9); +} +inline bool IsBidiSpace(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 10); +} +inline bool IsBidiNeutral(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 11); +} +inline bool IsBidiNotappl(wchar32 ch) { + return NUnicode::NPrivate::IsBidi(ch, 0); +} + +inline bool IsSpace(wchar32 ch) { + return IsWhitespace(ch); +} +inline bool IsLower(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Ll_LOWER)); +} +inline bool IsUpper(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lu_UPPER)); +} inline bool IsTitle(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lt_TITLE)); } -inline bool IsAlpha(wchar32 ch) { - return NUnicode::CharHasType(ch, +inline bool IsAlpha(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lu_UPPER) | SHIFT(Ll_LOWER) | SHIFT(Lt_TITLE) | SHIFT(Lm_LETTER) | SHIFT(Lm_EXTENDER) | SHIFT(Lo_OTHER) | SHIFT(Lo_IDEOGRAPH) | SHIFT(Lo_KATAKANA) | SHIFT(Lo_HIRAGANA) | SHIFT(Lo_LEADING) | SHIFT(Lo_VOWEL) | SHIFT(Lo_TRAILING)); -} -inline bool IsAlnum(wchar32 ch) { - return NUnicode::CharHasType(ch, +} +inline bool IsAlnum(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Lu_UPPER) | SHIFT(Ll_LOWER) | SHIFT(Lt_TITLE) | SHIFT(Lm_LETTER) | SHIFT(Lm_EXTENDER) | SHIFT(Lo_OTHER) | SHIFT(Lo_IDEOGRAPH) | SHIFT(Lo_KATAKANA) | SHIFT(Lo_HIRAGANA) | SHIFT(Lo_LEADING) | SHIFT(Lo_VOWEL) | SHIFT(Lo_TRAILING) | SHIFT(Nd_DIGIT) | SHIFT(Nl_LETTER) | SHIFT(Nl_IDEOGRAPH) | SHIFT(No_OTHER)); -} -inline bool IsPunct(wchar32 ch) { - return NUnicode::CharHasType(ch, +} +inline bool IsPunct(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Pd_DASH) | SHIFT(Pd_HYPHEN) | SHIFT(Ps_START) | SHIFT(Ps_QUOTE) | SHIFT(Pe_END) | SHIFT(Pe_QUOTE) | SHIFT(Pc_CONNECTOR) | SHIFT(Po_OTHER) | SHIFT(Po_QUOTE) | SHIFT(Po_TERMINAL) | SHIFT(Po_EXTENDER) | SHIFT(Po_HYPHEN) | SHIFT(Pi_QUOTE) | SHIFT(Pf_QUOTE)); -} -inline bool IsXdigit(wchar32 ch) { - return IsHexdigit(ch); -} -inline bool IsDigit(wchar32 ch) { - return IsDecdigit(ch); -} +} +inline bool IsXdigit(wchar32 ch) { + return IsHexdigit(ch); +} +inline bool IsDigit(wchar32 ch) { + return IsDecdigit(ch); +} inline bool IsCommonDigit(wchar32 ch) { // IsDigit returns true for some exotic symbols like "VAI DIGIT TWO" (U+A622) @@ -390,16 +390,16 @@ inline bool IsCommonDigit(wchar32 ch) { return ch >= ZERO && ch <= NINE; } -inline bool IsGraph(wchar32 ch) { - return IsAlnum(ch) || IsPunct(ch) || IsSymbol(ch); -} -inline bool IsBlank(wchar32 ch) { - return NUnicode::CharHasType(ch, SHIFT(Zs_SPACE) | SHIFT(Zs_ZWSPACE)) || ch == '\t'; -} -inline bool IsPrint(wchar32 ch) { - return IsAlnum(ch) || IsPunct(ch) || IsSymbol(ch) || IsBlank(ch); -} - +inline bool IsGraph(wchar32 ch) { + return IsAlnum(ch) || IsPunct(ch) || IsSymbol(ch); +} +inline bool IsBlank(wchar32 ch) { + return NUnicode::CharHasType(ch, SHIFT(Zs_SPACE) | SHIFT(Zs_ZWSPACE)) || ch == '\t'; +} +inline bool IsPrint(wchar32 ch) { + return IsAlnum(ch) || IsPunct(ch) || IsSymbol(ch) || IsBlank(ch); +} + inline bool IsRomanDigit(wchar32 ch) { if (NUnicode::CharHasType(ch, SHIFT(Nl_LETTER)) && 0x2160 <= ch && ch <= 0x2188) return true; @@ -418,4 +418,4 @@ inline bool IsRomanDigit(wchar32 ch) { return false; } -#undef SHIFT +#undef SHIFT |