diff options
author | iseg <iseg@yandex-team.ru> | 2022-02-10 16:49:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:39 +0300 |
commit | 8b71ce88bea710a9663bb143e4916f961c57212e (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /util/charset/unidata.h | |
parent | f828a15ab90e9ca8e848f83caf95c95f06be46e7 (diff) | |
download | ydb-8b71ce88bea710a9663bb143e4916f961c57212e.tar.gz |
Restoring authorship annotation for <iseg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'util/charset/unidata.h')
-rw-r--r-- | util/charset/unidata.h | 130 |
1 files changed, 65 insertions, 65 deletions
diff --git a/util/charset/unidata.h b/util/charset/unidata.h index 32014a8fa0..400d314186 100644 --- a/util/charset/unidata.h +++ b/util/charset/unidata.h @@ -1,9 +1,9 @@ #pragma once - + #include "unicode_table.h" #include <util/system/defaults.h> // wchar32, ui64, ULL() - + enum WC_TYPE { // TODO move no NUnicode Lu_UPPER = 1, // 'Ъ' Ll_LOWER = 2, // 'ъ' @@ -17,35 +17,35 @@ enum WC_TYPE { // TODO move no NUnicode Lo_LEADING = 10, // '?' Lo_VOWEL = 11, // '?' Lo_TRAILING = 12, // '?' - + Mn_NONSPACING = 13, // '`' Me_ENCLOSING = 14, // '`' Mc_SPACING = 15, // '`' - + Nd_DIGIT = 16, // '9' // convert to digit Nl_LETTER = 17, // 'X' // X,V,C,L,I ... - Nl_IDEOGRAPH = 18, // '?' + Nl_IDEOGRAPH = 18, // '?' No_OTHER = 19, // '9' - + Zs_SPACE = 20, // ' ' [\40\240] SPACE ... NO-BREAK SPACE (00A0) Zs_ZWSPACE = 21, // ' ' // nothing ? Zl_LINE = 22, // '\n' - Zp_PARAGRAPH = 23, // '\n' - + Zp_PARAGRAPH = 23, // '\n' + Cc_ASCII = 24, // '\x1A' // can not happen Cc_SPACE = 25, // '\x1A' // can not happen - Cc_SEPARATOR = 26, // '\x1A' // can not happen - + Cc_SEPARATOR = 26, // '\x1A' // can not happen + Cf_FORMAT = 27, // '\x1A' // nothing ? Cf_JOIN = 28, // '\x1A' // nothing ? Cf_BIDI = 29, // '\x1A' // nothing ? Cf_ZWNBSP = 30, // '\x1A' // nothing ? - + Cn_UNASSIGNED = 0, // '?' Co_PRIVATE = 0, // '?' Cs_LOW = 31, // '?' Cs_HIGH = 32, // '?' - + Pd_DASH = 33, // '-' Pd_HYPHEN = 34, // '-' [-] HYPHEN-MINUS Ps_START = 35, // '(' [([{] LEFT PARENTHESIS ... LEFT CURLY BRACKET @@ -60,7 +60,7 @@ enum WC_TYPE { // TODO move no NUnicode Po_TERMINAL = 44, // '.' [!,.:;?] EXCLAMATION MARK ... QUESTION MARK Po_EXTENDER = 45, // '-' [№] MIDDLE DOT (00B7) Po_HYPHEN = 46, // '-' - + Sm_MATH = 47, // '=' [+<=>|~] PLUS SIGN ... TILDE Sm_MINUS = 48, // '-' Sc_CURRENCY = 49, // '$' [$] DOLLAR SIGN @@ -75,25 +75,25 @@ enum WC_TYPE { // TODO move no NUnicode CCL_NUM = 57, CCL_MASK = 0x3F, - + IS_ASCII_XDIGIT = 1 << 6, IS_DIGIT = 1 << 7, IS_NONBREAK = 1 << 8, - + IS_PRIVATE = 1 << 9, - + IS_COMPAT = 1 << 10, IS_CANON = 1 << 11, - + NFD_QC = 1 << 12, NFC_QC = 1 << 13, NFKD_QC = 1 << 14, NFKC_QC = 1 << 15, - + BIDI_OFFSET = 16, SVAL_OFFSET = 22, -}; - +}; + const size_t DEFCHAR_BUF = 58; // CCL_NUM + 1 #define SHIFT(i) (ULL(1) << (i)) @@ -144,17 +144,17 @@ namespace NUnicode { } } -// all usefull properties +// all usefull properties inline bool IsComposed(wchar32 ch) { return NUnicode::NPrivate::CharInfo(ch) & (IS_COMPAT | IS_CANON); -} +} inline bool IsCanonComposed(wchar32 ch) { return NUnicode::NPrivate::CharInfo(ch) & IS_CANON; -} +} inline bool IsCompatComposed(wchar32 ch) { return NUnicode::NPrivate::CharInfo(ch) & IS_COMPAT; -} +} inline bool IsWhitespace(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cc_SPACE) | SHIFT(Zs_SPACE) | SHIFT(Zs_ZWSPACE) | SHIFT(Zl_LINE) | SHIFT(Zp_PARAGRAPH)); @@ -164,42 +164,42 @@ inline bool IsAsciiCntrl(wchar32 ch) { } inline bool IsBidiCntrl(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cf_BIDI)); -} +} inline bool IsJoinCntrl(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cf_JOIN)); -} +} inline bool IsFormatCntrl(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT)); -} +} inline bool IsIgnorableCntrl(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP)); -} +} inline bool IsCntrl(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) | SHIFT(Cc_ASCII) | SHIFT(Cc_SPACE) | SHIFT(Cc_SEPARATOR)); -} +} inline bool IsZerowidth(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) | SHIFT(Zs_ZWSPACE)); -} +} inline bool IsLineSep(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Zl_LINE)); -} +} inline bool IsParaSep(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Zp_PARAGRAPH)); -} +} inline bool IsDash(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Pd_DASH) | SHIFT(Pd_HYPHEN) | SHIFT(Sm_MINUS)); -} +} inline bool IsHyphen(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Pd_HYPHEN) | SHIFT(Po_HYPHEN)); -} +} inline bool IsQuotation(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Po_QUOTE) | SHIFT(Ps_QUOTE) | SHIFT(Pe_QUOTE) | SHIFT(Pi_QUOTE) | SHIFT(Pf_QUOTE) | SHIFT(Po_SINGLE_QUOTE) | SHIFT(Ps_SINGLE_QUOTE) | SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE)); -} +} inline bool IsSingleQuotation(wchar32 ch) { return NUnicode::CharHasType(ch, @@ -209,100 +209,100 @@ inline bool IsSingleQuotation(wchar32 ch) { inline bool IsTerminal(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Po_TERMINAL)); -} +} inline bool IsPairedPunct(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Ps_START) | SHIFT(Pe_END) | SHIFT(Ps_QUOTE) | SHIFT(Pe_QUOTE) | SHIFT(Pi_QUOTE) | SHIFT(Pf_QUOTE) | SHIFT(Ps_SINGLE_QUOTE) | SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE)); -} +} inline bool IsLeftPunct(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Ps_START) | SHIFT(Ps_QUOTE) | SHIFT(Ps_SINGLE_QUOTE)); -} +} inline bool IsRightPunct(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Pe_END) | SHIFT(Pe_QUOTE) | SHIFT(Pe_SINGLE_QUOTE)); -} +} inline bool IsCombining(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Mc_SPACING) | SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING)); -} +} inline bool IsNonspacing(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING)); -} +} inline bool IsAlphabetic(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lu_UPPER) | SHIFT(Ll_LOWER) | SHIFT(Lt_TITLE) | SHIFT(Lm_EXTENDER) | SHIFT(Lm_LETTER) | SHIFT(Lo_OTHER) | SHIFT(Nl_LETTER)); -} +} inline bool IsIdeographic(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lo_IDEOGRAPH) | SHIFT(Nl_IDEOGRAPH)); -} +} inline bool IsKatakana(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lo_KATAKANA)); -} +} inline bool IsHiragana(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lo_HIRAGANA)); -} +} inline bool IsHangulLeading(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lo_LEADING)); -} +} inline bool IsHangulVowel(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lo_VOWEL)); -} +} inline bool IsHangulTrailing(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Lo_TRAILING)); -} +} inline bool IsHexdigit(wchar32 ch) { return NUnicode::NPrivate::CharInfo(ch) & IS_ASCII_XDIGIT; -} +} inline bool IsDecdigit(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT)); -} +} inline bool IsNumeric(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT) | SHIFT(Nl_LETTER) | SHIFT(Nl_IDEOGRAPH) | SHIFT(No_OTHER)); -} +} inline bool IsCurrency(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Sc_CURRENCY)); -} +} inline bool IsMath(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Sm_MATH)); -} +} inline bool IsSymbol(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Sm_MATH) | SHIFT(Sm_MINUS) | SHIFT(Sc_CURRENCY) | SHIFT(Sk_MODIFIER) | SHIFT(So_OTHER)); -} +} inline bool IsLowSurrogate(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cs_LOW)); -} +} inline bool IsHighSurrogate(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)); -} +} inline bool IsNonbreak(wchar32 ch) { return NUnicode::NPrivate::CharInfo(ch) & IS_NONBREAK; -} +} inline bool IsPrivate(wchar32 ch) { return (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE) && !NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)); -} +} inline bool IsUnassigned(wchar32 ch) { return (NUnicode::CharType(ch) == 0) && !(NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE); -} +} inline bool IsPrivateHighSurrogate(wchar32 ch) { return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)) && (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE); -} +} -// transformations +// transformations inline wchar32 ToLower(wchar32 ch) { return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Lower); -} +} inline wchar32 ToUpper(wchar32 ch) { return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Upper); -} +} inline wchar32 ToTitle(wchar32 ch) { return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Title); -} +} inline int ToDigit(wchar32 ch) { ui32 i = NUnicode::NPrivate::CharInfo(ch); return (i & IS_DIGIT) ? static_cast<int>(i >> SVAL_OFFSET) : -1; -} +} // BIDI properties |