aboutsummaryrefslogtreecommitdiffstats
path: root/util/charset/unidata.h
diff options
context:
space:
mode:
authoriseg <iseg@yandex-team.ru>2022-02-10 16:49:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:39 +0300
commit8b71ce88bea710a9663bb143e4916f961c57212e (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /util/charset/unidata.h
parentf828a15ab90e9ca8e848f83caf95c95f06be46e7 (diff)
downloadydb-8b71ce88bea710a9663bb143e4916f961c57212e.tar.gz
Restoring authorship annotation for <iseg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'util/charset/unidata.h')
-rw-r--r--util/charset/unidata.h130
1 files changed, 65 insertions, 65 deletions
diff --git a/util/charset/unidata.h b/util/charset/unidata.h
index 32014a8fa0..400d314186 100644
--- a/util/charset/unidata.h
+++ b/util/charset/unidata.h
@@ -1,9 +1,9 @@
#pragma once
-
+
#include "unicode_table.h"
#include <util/system/defaults.h> // wchar32, ui64, ULL()
-
+
enum WC_TYPE { // TODO move no NUnicode
Lu_UPPER = 1, // 'Ъ'
Ll_LOWER = 2, // 'ъ'
@@ -17,35 +17,35 @@ enum WC_TYPE { // TODO move no NUnicode
Lo_LEADING = 10, // '?'
Lo_VOWEL = 11, // '?'
Lo_TRAILING = 12, // '?'
-
+
Mn_NONSPACING = 13, // '`'
Me_ENCLOSING = 14, // '`'
Mc_SPACING = 15, // '`'
-
+
Nd_DIGIT = 16, // '9' // convert to digit
Nl_LETTER = 17, // 'X' // X,V,C,L,I ...
- Nl_IDEOGRAPH = 18, // '?'
+ Nl_IDEOGRAPH = 18, // '?'
No_OTHER = 19, // '9'
-
+
Zs_SPACE = 20, // ' ' [\40\240] SPACE ... NO-BREAK SPACE (00A0)
Zs_ZWSPACE = 21, // ' ' // nothing ?
Zl_LINE = 22, // '\n'
- Zp_PARAGRAPH = 23, // '\n'
-
+ Zp_PARAGRAPH = 23, // '\n'
+
Cc_ASCII = 24, // '\x1A' // can not happen
Cc_SPACE = 25, // '\x1A' // can not happen
- Cc_SEPARATOR = 26, // '\x1A' // can not happen
-
+ Cc_SEPARATOR = 26, // '\x1A' // can not happen
+
Cf_FORMAT = 27, // '\x1A' // nothing ?
Cf_JOIN = 28, // '\x1A' // nothing ?
Cf_BIDI = 29, // '\x1A' // nothing ?
Cf_ZWNBSP = 30, // '\x1A' // nothing ?
-
+
Cn_UNASSIGNED = 0, // '?'
Co_PRIVATE = 0, // '?'
Cs_LOW = 31, // '?'
Cs_HIGH = 32, // '?'
-
+
Pd_DASH = 33, // '-'
Pd_HYPHEN = 34, // '-' [-] HYPHEN-MINUS
Ps_START = 35, // '(' [([{] LEFT PARENTHESIS ... LEFT CURLY BRACKET
@@ -60,7 +60,7 @@ enum WC_TYPE { // TODO move no NUnicode
Po_TERMINAL = 44, // '.' [!,.:;?] EXCLAMATION MARK ... QUESTION MARK
Po_EXTENDER = 45, // '-' [№] MIDDLE DOT (00B7)
Po_HYPHEN = 46, // '-'
-
+
Sm_MATH = 47, // '=' [+<=>|~] PLUS SIGN ... TILDE
Sm_MINUS = 48, // '-'
Sc_CURRENCY = 49, // '$' [$] DOLLAR SIGN
@@ -75,25 +75,25 @@ enum WC_TYPE { // TODO move no NUnicode
CCL_NUM = 57,
CCL_MASK = 0x3F,
-
+
IS_ASCII_XDIGIT = 1 << 6,
IS_DIGIT = 1 << 7,
IS_NONBREAK = 1 << 8,
-
+
IS_PRIVATE = 1 << 9,
-
+
IS_COMPAT = 1 << 10,
IS_CANON = 1 << 11,
-
+
NFD_QC = 1 << 12,
NFC_QC = 1 << 13,
NFKD_QC = 1 << 14,
NFKC_QC = 1 << 15,
-
+
BIDI_OFFSET = 16,
SVAL_OFFSET = 22,
-};
-
+};
+
const size_t DEFCHAR_BUF = 58; // CCL_NUM + 1
#define SHIFT(i) (ULL(1) << (i))
@@ -144,17 +144,17 @@ namespace NUnicode {
}
}
-// all usefull properties
+// all usefull properties
inline bool IsComposed(wchar32 ch) {
return NUnicode::NPrivate::CharInfo(ch) & (IS_COMPAT | IS_CANON);
-}
+}
inline bool IsCanonComposed(wchar32 ch) {
return NUnicode::NPrivate::CharInfo(ch) & IS_CANON;
-}
+}
inline bool IsCompatComposed(wchar32 ch) {
return NUnicode::NPrivate::CharInfo(ch) & IS_COMPAT;
-}
+}
inline bool IsWhitespace(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cc_SPACE) | SHIFT(Zs_SPACE) | SHIFT(Zs_ZWSPACE) | SHIFT(Zl_LINE) | SHIFT(Zp_PARAGRAPH));
@@ -164,42 +164,42 @@ inline bool IsAsciiCntrl(wchar32 ch) {
}
inline bool IsBidiCntrl(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cf_BIDI));
-}
+}
inline bool IsJoinCntrl(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cf_JOIN));
-}
+}
inline bool IsFormatCntrl(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT));
-}
+}
inline bool IsIgnorableCntrl(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP));
-}
+}
inline bool IsCntrl(wchar32 ch) {
return NUnicode::CharHasType(ch,
SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) |
SHIFT(Cc_ASCII) | SHIFT(Cc_SPACE) | SHIFT(Cc_SEPARATOR));
-}
+}
inline bool IsZerowidth(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cf_FORMAT) | SHIFT(Cf_JOIN) | SHIFT(Cf_BIDI) | SHIFT(Cf_ZWNBSP) | SHIFT(Zs_ZWSPACE));
-}
+}
inline bool IsLineSep(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Zl_LINE));
-}
+}
inline bool IsParaSep(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Zp_PARAGRAPH));
-}
+}
inline bool IsDash(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Pd_DASH) | SHIFT(Pd_HYPHEN) | SHIFT(Sm_MINUS));
-}
+}
inline bool IsHyphen(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Pd_HYPHEN) | SHIFT(Po_HYPHEN));
-}
+}
inline bool IsQuotation(wchar32 ch) {
return NUnicode::CharHasType(ch,
SHIFT(Po_QUOTE) | SHIFT(Ps_QUOTE) | SHIFT(Pe_QUOTE) | SHIFT(Pi_QUOTE) |
SHIFT(Pf_QUOTE) | SHIFT(Po_SINGLE_QUOTE) | SHIFT(Ps_SINGLE_QUOTE) |
SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE));
-}
+}
inline bool IsSingleQuotation(wchar32 ch) {
return NUnicode::CharHasType(ch,
@@ -209,100 +209,100 @@ inline bool IsSingleQuotation(wchar32 ch) {
inline bool IsTerminal(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Po_TERMINAL));
-}
+}
inline bool IsPairedPunct(wchar32 ch) {
return NUnicode::CharHasType(ch,
SHIFT(Ps_START) | SHIFT(Pe_END) | SHIFT(Ps_QUOTE) | SHIFT(Pe_QUOTE) |
SHIFT(Pi_QUOTE) | SHIFT(Pf_QUOTE) | SHIFT(Ps_SINGLE_QUOTE) |
SHIFT(Pe_SINGLE_QUOTE) | SHIFT(Pi_SINGLE_QUOTE) | SHIFT(Pf_SINGLE_QUOTE));
-}
+}
inline bool IsLeftPunct(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Ps_START) | SHIFT(Ps_QUOTE) | SHIFT(Ps_SINGLE_QUOTE));
-}
+}
inline bool IsRightPunct(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Pe_END) | SHIFT(Pe_QUOTE) | SHIFT(Pe_SINGLE_QUOTE));
-}
+}
inline bool IsCombining(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Mc_SPACING) | SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING));
-}
+}
inline bool IsNonspacing(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Mn_NONSPACING) | SHIFT(Me_ENCLOSING));
-}
+}
inline bool IsAlphabetic(wchar32 ch) {
return NUnicode::CharHasType(ch,
SHIFT(Lu_UPPER) | SHIFT(Ll_LOWER) | SHIFT(Lt_TITLE) | SHIFT(Lm_EXTENDER) | SHIFT(Lm_LETTER) | SHIFT(Lo_OTHER) | SHIFT(Nl_LETTER));
-}
+}
inline bool IsIdeographic(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Lo_IDEOGRAPH) | SHIFT(Nl_IDEOGRAPH));
-}
+}
inline bool IsKatakana(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Lo_KATAKANA));
-}
+}
inline bool IsHiragana(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Lo_HIRAGANA));
-}
+}
inline bool IsHangulLeading(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Lo_LEADING));
-}
+}
inline bool IsHangulVowel(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Lo_VOWEL));
-}
+}
inline bool IsHangulTrailing(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Lo_TRAILING));
-}
+}
inline bool IsHexdigit(wchar32 ch) {
return NUnicode::NPrivate::CharInfo(ch) & IS_ASCII_XDIGIT;
-}
+}
inline bool IsDecdigit(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT));
-}
+}
inline bool IsNumeric(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Nd_DIGIT) | SHIFT(Nl_LETTER) | SHIFT(Nl_IDEOGRAPH) | SHIFT(No_OTHER));
-}
+}
inline bool IsCurrency(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Sc_CURRENCY));
-}
+}
inline bool IsMath(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Sm_MATH));
-}
+}
inline bool IsSymbol(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Sm_MATH) | SHIFT(Sm_MINUS) | SHIFT(Sc_CURRENCY) | SHIFT(Sk_MODIFIER) | SHIFT(So_OTHER));
-}
+}
inline bool IsLowSurrogate(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cs_LOW));
-}
+}
inline bool IsHighSurrogate(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH));
-}
+}
inline bool IsNonbreak(wchar32 ch) {
return NUnicode::NPrivate::CharInfo(ch) & IS_NONBREAK;
-}
+}
inline bool IsPrivate(wchar32 ch) {
return (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE) && !NUnicode::CharHasType(ch, SHIFT(Cs_HIGH));
-}
+}
inline bool IsUnassigned(wchar32 ch) {
return (NUnicode::CharType(ch) == 0) && !(NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE);
-}
+}
inline bool IsPrivateHighSurrogate(wchar32 ch) {
return NUnicode::CharHasType(ch, SHIFT(Cs_HIGH)) && (NUnicode::NPrivate::CharInfo(ch) & IS_PRIVATE);
-}
+}
-// transformations
+// transformations
inline wchar32 ToLower(wchar32 ch) {
return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Lower);
-}
+}
inline wchar32 ToUpper(wchar32 ch) {
return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Upper);
-}
+}
inline wchar32 ToTitle(wchar32 ch) {
return static_cast<wchar32>(ch + NUnicode::NPrivate::CharProperty(ch).Title);
-}
+}
inline int ToDigit(wchar32 ch) {
ui32 i = NUnicode::NPrivate::CharInfo(ch);
return (i & IS_DIGIT) ? static_cast<int>(i >> SVAL_OFFSET) : -1;
-}
+}
// BIDI properties