aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/langs/scripts.h
diff options
context:
space:
mode:
authorqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
committerqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
commit22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
treebffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/langs/scripts.h
parent332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
downloadydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz
validate canons without yatest_common
Diffstat (limited to 'library/cpp/langs/scripts.h')
-rw-r--r--library/cpp/langs/scripts.h56
1 files changed, 56 insertions, 0 deletions
diff --git a/library/cpp/langs/scripts.h b/library/cpp/langs/scripts.h
new file mode 100644
index 00000000000..4c47a33d2cb
--- /dev/null
+++ b/library/cpp/langs/scripts.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+// Writing systems, a.k.a. scripts
+//
+enum EScript {
+ SCRIPT_UNKNOWN = 0,
+ SCRIPT_LATIN,
+ SCRIPT_CYRILLIC,
+
+ SCRIPT_GREEK,
+ SCRIPT_ARABIC,
+ SCRIPT_HEBREW,
+ SCRIPT_ARMENIAN,
+ SCRIPT_GEORGIAN,
+
+ SCRIPT_HAN,
+ SCRIPT_KATAKANA,
+ SCRIPT_HIRAGANA,
+ SCRIPT_HANGUL,
+
+ SCRIPT_DEVANAGARI,
+ SCRIPT_BENGALI,
+ SCRIPT_GUJARATI,
+ SCRIPT_GURMUKHI,
+ SCRIPT_KANNADA,
+ SCRIPT_MALAYALAM,
+ SCRIPT_ORIYA,
+ SCRIPT_TAMIL,
+ SCRIPT_TELUGU,
+ SCRIPT_THAANA,
+ SCRIPT_SINHALA,
+
+ SCRIPT_MYANMAR,
+ SCRIPT_THAI,
+ SCRIPT_LAO,
+ SCRIPT_KHMER,
+ SCRIPT_TIBETAN,
+ SCRIPT_MONGOLIAN,
+
+ SCRIPT_ETHIOPIC,
+ SCRIPT_RUNIC,
+ SCRIPT_COPTIC,
+ SCRIPT_SYRIAC,
+
+ SCRIPT_OTHER,
+ SCRIPT_MAX
+};
+
+// According to ISO 15924 codes. See https://en.wikipedia.org/wiki/ISO_15924
+//
+EScript ScriptByName(const TStringBuf& name);
+EScript ScriptByNameOrDie(const TStringBuf& name);
+const char* IsoNameByScript(EScript script);
+const char* FullNameByScript(EScript script);