aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoramnosov <amnosov@yandex-team.com>2022-10-26 11:59:40 +0300
committeramnosov <amnosov@yandex-team.com>2022-10-26 11:59:40 +0300
commit4225eab76862f099d4d55a0205ab0cdd39c0433c (patch)
tree842ff268488999a8f54243cfb10ba96fb333645b
parent2399206380b6eab57bb7b9ad0bf0ecf851c94c1d (diff)
downloadydb-4225eab76862f099d4d55a0205ab0cdd39c0433c.tar.gz
Unicode::Is{Category}
Unicode::Is{Category} udfs added
-rw-r--r--library/cpp/unicode/CMakeLists.txt1
-rw-r--r--library/cpp/unicode/set/CMakeLists.txt33
-rw-r--r--library/cpp/unicode/set/category_ranges.h18
-rw-r--r--library/cpp/unicode/set/generated/category_ranges.cpp293
-rw-r--r--library/cpp/unicode/set/quoted_pair.cpp53
-rw-r--r--library/cpp/unicode/set/quoted_pair.h15
-rw-r--r--library/cpp/unicode/set/set.cpp6
-rw-r--r--library/cpp/unicode/set/set.h4
-rw-r--r--library/cpp/unicode/set/unicode_set.cpp480
-rw-r--r--library/cpp/unicode/set/unicode_set.h154
-rw-r--r--library/cpp/unicode/set/unicode_set_lexer.h49
-rw-r--r--library/cpp/unicode/set/unicode_set_lexer.rl6125
-rw-r--r--library/cpp/unicode/set/unicode_set_parser.cpp109
-rw-r--r--library/cpp/unicode/set/unicode_set_parser.h11
-rw-r--r--library/cpp/unicode/set/unicode_set_token.cpp1
-rw-r--r--library/cpp/unicode/set/unicode_set_token.h68
-rw-r--r--ydb/library/yql/udfs/common/unicode_base/lib/CMakeLists.txt1
-rw-r--r--ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h61
18 files changed, 1481 insertions, 1 deletions
diff --git a/library/cpp/unicode/CMakeLists.txt b/library/cpp/unicode/CMakeLists.txt
index 915ed345c1..0b54d5d98d 100644
--- a/library/cpp/unicode/CMakeLists.txt
+++ b/library/cpp/unicode/CMakeLists.txt
@@ -8,3 +8,4 @@
add_subdirectory(normalization)
add_subdirectory(punycode)
+add_subdirectory(set)
diff --git a/library/cpp/unicode/set/CMakeLists.txt b/library/cpp/unicode/set/CMakeLists.txt
new file mode 100644
index 0000000000..44380308ed
--- /dev/null
+++ b/library/cpp/unicode/set/CMakeLists.txt
@@ -0,0 +1,33 @@
+
+# This file was gererated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_library(cpp-unicode-set)
+target_link_libraries(cpp-unicode-set PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ tools-enum_parser-enum_serialization_runtime
+)
+target_sources(cpp-unicode-set PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/quoted_pair.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_parser.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_token.cpp
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/generated/category_ranges.cpp
+)
+generate_enum_serilization(cpp-unicode-set
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_token.h
+ INCLUDE_HEADERS
+ library/cpp/unicode/set/unicode_set_token.h
+)
+target_ragel_lexers(cpp-unicode-set
+ PRIVATE
+ ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_lexer.rl6
+ -CG2
+)
diff --git a/library/cpp/unicode/set/category_ranges.h b/library/cpp/unicode/set/category_ranges.h
new file mode 100644
index 0000000000..10256d2e6e
--- /dev/null
+++ b/library/cpp/unicode/set/category_ranges.h
@@ -0,0 +1,18 @@
+#pragma once
+
+#include <util/charset/unidata.h>
+#include <util/system/defaults.h>
+#include <util/generic/strbuf.h>
+
+namespace NUnicode {
+ namespace NPrivate {
+ struct TCategoryRanges {
+ size_t Count;
+ const wchar32* Data;
+ };
+
+ const TCategoryRanges& GetCategoryRanges(WC_TYPE cat);
+ const TCategoryRanges& GetCategoryRanges(const TStringBuf& category);
+
+ }
+}
diff --git a/library/cpp/unicode/set/generated/category_ranges.cpp b/library/cpp/unicode/set/generated/category_ranges.cpp
new file mode 100644
index 0000000000..44e2430239
--- /dev/null
+++ b/library/cpp/unicode/set/generated/category_ranges.cpp
@@ -0,0 +1,293 @@
+#include <library/cpp/unicode/set/category_ranges.h>
+
+#include <util/generic/hash.h>
+#include <util/generic/singleton.h>
+#include <util/generic/yexception.h>
+#include <utility>
+
+namespace NUnicode {
+namespace NPrivate {
+
+static const wchar32 CAT_C[] = {0, 32, 127, 160, 173, 174, 888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1542, 1564, 1566, 1757, 1758, 1806, 1808, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2274, 2275, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6158, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8204, 8208, 8234, 8239, 8288, 8304, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65532, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69821, 69822, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113824, 118784, 119030, 119040, 119079, 119081, 119155, 119163, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917760, 918000, 1114112};
+static const wchar32 CAT_Cc[] = {0, 32, 127, 160, 1114112};
+static const wchar32 CAT_Cc_ASCII[] = {0, 9, 14, 28, 127, 160, 1114112};
+static const wchar32 CAT_Cc_SEPARATOR[] = {28, 32, 1114112};
+static const wchar32 CAT_Cc_SPACE[] = {9, 14, 1114112};
+static const wchar32 CAT_Cf[] = {173, 174, 1536, 1542, 1564, 1565, 1757, 1758, 1807, 1808, 2274, 2275, 6158, 6159, 8204, 8208, 8234, 8239, 8288, 8293, 8294, 8304, 65279, 65280, 65529, 65532, 69821, 69822, 113824, 113828, 119155, 119163, 917505, 917506, 917536, 917632, 1114112};
+static const wchar32 CAT_Cf_BIDI[] = {8206, 8208, 8234, 8239, 8294, 8296, 8297, 8298, 917544, 917546, 917595, 917596, 917597, 917598, 917627, 917628, 917629, 917630, 1114112};
+static const wchar32 CAT_Cf_FORMAT[] = {173, 174, 1536, 1542, 1564, 1565, 1757, 1758, 1807, 1808, 2274, 2275, 6158, 6159, 8289, 8293, 8296, 8297, 8298, 8304, 65529, 65532, 69821, 69822, 113824, 113828, 119155, 119163, 917505, 917506, 917536, 917544, 917546, 917595, 917596, 917597, 917598, 917627, 917628, 917629, 917630, 917632, 1114112};
+static const wchar32 CAT_Cf_JOIN[] = {8204, 8206, 8288, 8289, 1114112};
+static const wchar32 CAT_Cf_ZWNBSP[] = {65279, 65280, 1114112};
+static const wchar32 CAT_Cn[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112};
+static const wchar32 CAT_Cn_UNASSIGNED[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112};
+static const wchar32 CAT_Co[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112};
+static const wchar32 CAT_Co_PRIVATE[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112};
+static const wchar32 CAT_Cs[] = {55296, 57344, 1114112};
+static const wchar32 CAT_Cs_HIGH[] = {55296, 56320, 1114112};
+static const wchar32 CAT_Cs_LOW[] = {56320, 57344, 1114112};
+static const wchar32 CAT_L[] = {65, 91, 97, 123, 170, 171, 181, 182, 186, 187, 192, 215, 216, 247, 248, 706, 710, 722, 736, 741, 748, 749, 750, 751, 880, 885, 886, 888, 890, 894, 895, 896, 902, 903, 904, 907, 908, 909, 910, 930, 931, 1014, 1015, 1154, 1162, 1328, 1329, 1367, 1369, 1370, 1377, 1416, 1488, 1515, 1520, 1523, 1568, 1611, 1646, 1648, 1649, 1748, 1749, 1750, 1765, 1767, 1774, 1776, 1786, 1789, 1791, 1792, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1970, 1994, 2027, 2036, 2038, 2042, 2043, 2048, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2112, 2137, 2208, 2229, 2230, 2238, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2417, 2433, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2493, 2494, 2510, 2511, 2524, 2526, 2527, 2530, 2544, 2546, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2649, 2653, 2654, 2655, 2674, 2677, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2749, 2750, 2768, 2769, 2784, 2786, 2809, 2810, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2877, 2878, 2908, 2910, 2911, 2914, 2929, 2930, 2947, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3024, 3025, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3134, 3160, 3163, 3168, 3170, 3200, 3201, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3261, 3262, 3294, 3295, 3296, 3298, 3313, 3315, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3390, 3406, 3407, 3412, 3415, 3423, 3426, 3450, 3456, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3585, 3633, 3634, 3636, 3648, 3655, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3761, 3762, 3764, 3773, 3774, 3776, 3781, 3782, 3783, 3804, 3808, 3840, 3841, 3904, 3912, 3913, 3949, 3976, 3981, 4096, 4139, 4159, 4160, 4176, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4256, 4294, 4295, 4296, 4301, 4302, 4304, 4347, 4348, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4992, 5008, 5024, 5110, 5112, 5118, 5121, 5741, 5743, 5760, 5761, 5787, 5792, 5867, 5873, 5881, 5888, 5901, 5902, 5906, 5920, 5938, 5952, 5970, 5984, 5997, 5998, 6001, 6016, 6068, 6103, 6104, 6108, 6109, 6176, 6264, 6272, 6277, 6279, 6313, 6314, 6315, 6320, 6390, 6400, 6431, 6480, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6656, 6679, 6688, 6741, 6823, 6824, 6917, 6964, 6981, 6988, 7043, 7073, 7086, 7088, 7098, 7142, 7168, 7204, 7245, 7248, 7258, 7294, 7296, 7305, 7401, 7405, 7406, 7410, 7413, 7415, 7424, 7616, 7680, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8125, 8126, 8127, 8130, 8133, 8134, 8141, 8144, 8148, 8150, 8156, 8160, 8173, 8178, 8181, 8182, 8189, 8305, 8306, 8319, 8320, 8336, 8349, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8512, 8517, 8522, 8526, 8527, 8579, 8581, 11264, 11311, 11312, 11359, 11360, 11493, 11499, 11503, 11506, 11508, 11520, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11632, 11648, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11823, 11824, 12293, 12295, 12337, 12342, 12347, 12349, 12353, 12439, 12445, 12448, 12449, 12539, 12540, 12544, 12549, 12590, 12593, 12687, 12704, 12731, 12784, 12800, 13312, 19894, 19968, 40918, 40960, 42125, 42192, 42238, 42240, 42509, 42512, 42528, 42538, 42540, 42560, 42607, 42623, 42654, 42656, 42726, 42775, 42784, 42786, 42889, 42891, 42927, 42928, 42936, 42999, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43072, 43124, 43138, 43188, 43250, 43256, 43259, 43260, 43261, 43262, 43274, 43302, 43312, 43335, 43360, 43389, 43396, 43443, 43471, 43472, 43488, 43493, 43494, 43504, 43514, 43519, 43520, 43561, 43584, 43587, 43588, 43596, 43616, 43639, 43642, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43715, 43739, 43742, 43744, 43755, 43762, 43765, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43867, 43868, 43878, 43888, 44003, 44032, 55204, 55216, 55239, 55243, 55292, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64286, 64287, 64297, 64298, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64434, 64467, 64830, 64848, 64912, 64914, 64968, 65008, 65020, 65136, 65141, 65142, 65277, 65313, 65339, 65345, 65371, 65382, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 66176, 66205, 66208, 66257, 66304, 66336, 66352, 66369, 66370, 66378, 66384, 66422, 66432, 66462, 66464, 66500, 66504, 66512, 66560, 66718, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67680, 67703, 67712, 67743, 67808, 67827, 67828, 67830, 67840, 67862, 67872, 67898, 67968, 68024, 68030, 68032, 68096, 68097, 68112, 68116, 68117, 68120, 68121, 68148, 68192, 68221, 68224, 68253, 68288, 68296, 68297, 68325, 68352, 68406, 68416, 68438, 68448, 68467, 68480, 68498, 68608, 68681, 68736, 68787, 68800, 68851, 69635, 69688, 69763, 69808, 69840, 69865, 69891, 69927, 69968, 70003, 70006, 70007, 70019, 70067, 70081, 70085, 70106, 70107, 70108, 70109, 70144, 70162, 70163, 70188, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70313, 70320, 70367, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70461, 70462, 70480, 70481, 70493, 70498, 70656, 70709, 70727, 70731, 70784, 70832, 70852, 70854, 70855, 70856, 71040, 71087, 71128, 71132, 71168, 71216, 71236, 71237, 71296, 71339, 71424, 71450, 71840, 71904, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72751, 72768, 72769, 72818, 72848, 73728, 74650, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92880, 92910, 92928, 92976, 92992, 92996, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94033, 94099, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 120780, 124928, 125125, 125184, 125252, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 1114112};
+static const wchar32 CAT_Ll[] = {97, 123, 181, 182, 223, 247, 248, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 443, 445, 448, 454, 455, 457, 458, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 499, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 660, 661, 688, 881, 882, 883, 884, 887, 888, 891, 894, 912, 913, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1014, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1377, 1416, 5112, 5118, 7296, 7305, 7424, 7468, 7531, 7544, 7545, 7579, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7958, 7968, 7976, 7984, 7992, 8000, 8006, 8016, 8024, 8032, 8040, 8048, 8062, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8117, 8118, 8120, 8126, 8127, 8130, 8133, 8134, 8136, 8144, 8148, 8150, 8152, 8160, 8168, 8178, 8181, 8182, 8184, 8458, 8459, 8462, 8464, 8467, 8468, 8495, 8496, 8500, 8501, 8505, 8506, 8508, 8510, 8518, 8522, 8526, 8527, 8580, 8581, 11312, 11359, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11388, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11493, 11500, 11501, 11502, 11503, 11507, 11508, 11520, 11558, 11559, 11560, 11565, 11566, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42606, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42652, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42864, 42865, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42888, 42892, 42893, 42894, 42895, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42933, 42934, 42935, 42936, 43002, 43003, 43824, 43867, 43872, 43878, 43888, 43968, 64256, 64263, 64275, 64280, 65345, 65371, 66600, 66640, 66776, 66812, 68800, 68851, 71872, 71904, 119834, 119860, 119886, 119893, 119894, 119912, 119938, 119964, 119990, 119994, 119995, 119996, 119997, 120004, 120005, 120016, 120042, 120068, 120094, 120120, 120146, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120486, 120514, 120539, 120540, 120546, 120572, 120597, 120598, 120604, 120630, 120655, 120656, 120662, 120688, 120713, 120714, 120720, 120746, 120771, 120772, 120778, 120779, 120780, 125218, 125252, 1114112};
+static const wchar32 CAT_Ll_LOWER[] = {97, 123, 181, 182, 223, 247, 248, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 443, 445, 448, 454, 455, 457, 458, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 499, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 660, 661, 688, 881, 882, 883, 884, 887, 888, 891, 894, 912, 913, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1014, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1377, 1416, 5112, 5118, 7296, 7305, 7424, 7468, 7531, 7544, 7545, 7579, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7958, 7968, 7976, 7984, 7992, 8000, 8006, 8016, 8024, 8032, 8040, 8048, 8062, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8117, 8118, 8120, 8126, 8127, 8130, 8133, 8134, 8136, 8144, 8148, 8150, 8152, 8160, 8168, 8178, 8181, 8182, 8184, 8458, 8459, 8462, 8464, 8467, 8468, 8495, 8496, 8500, 8501, 8505, 8506, 8508, 8510, 8518, 8522, 8526, 8527, 8580, 8581, 11312, 11359, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11388, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11493, 11500, 11501, 11502, 11503, 11507, 11508, 11520, 11558, 11559, 11560, 11565, 11566, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42606, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42652, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42864, 42865, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42888, 42892, 42893, 42894, 42895, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42933, 42934, 42935, 42936, 43002, 43003, 43824, 43867, 43872, 43878, 43888, 43968, 64256, 64263, 64275, 64280, 65345, 65371, 66600, 66640, 66776, 66812, 68800, 68851, 71872, 71904, 119834, 119860, 119886, 119893, 119894, 119912, 119938, 119964, 119990, 119994, 119995, 119996, 119997, 120004, 120005, 120016, 120042, 120068, 120094, 120120, 120146, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120486, 120514, 120539, 120540, 120546, 120572, 120597, 120598, 120604, 120630, 120655, 120656, 120662, 120688, 120713, 120714, 120720, 120746, 120771, 120772, 120778, 120779, 120780, 125218, 125252, 1114112};
+static const wchar32 CAT_Lm[] = {688, 706, 710, 722, 736, 741, 748, 749, 750, 751, 884, 885, 890, 891, 1369, 1370, 1600, 1601, 1765, 1767, 2036, 2038, 2042, 2043, 2074, 2075, 2084, 2085, 2088, 2089, 2417, 2418, 3654, 3655, 3782, 3783, 4348, 4349, 6103, 6104, 6211, 6212, 6823, 6824, 7288, 7294, 7468, 7531, 7544, 7545, 7579, 7616, 8305, 8306, 8319, 8320, 8336, 8349, 11388, 11390, 11631, 11632, 11823, 11824, 12293, 12294, 12337, 12342, 12347, 12348, 12445, 12447, 12540, 12543, 40981, 40982, 42232, 42238, 42508, 42509, 42623, 42624, 42652, 42654, 42775, 42784, 42864, 42865, 42888, 42889, 43000, 43002, 43471, 43472, 43494, 43495, 43632, 43633, 43741, 43742, 43763, 43765, 43868, 43872, 65392, 65393, 65438, 65440, 92992, 92996, 94099, 94112, 94176, 94177, 1114112};
+static const wchar32 CAT_Lm_EXTENDER[] = {711, 712, 720, 722, 884, 885, 890, 891, 1600, 1601, 2036, 2038, 2042, 2043, 2417, 2418, 3654, 3655, 3782, 3783, 6103, 6104, 6823, 6824, 7288, 7294, 11823, 11824, 12293, 12294, 12337, 12342, 12347, 12348, 12445, 12447, 12540, 12543, 40981, 40982, 42508, 42509, 42623, 42624, 43471, 43472, 43741, 43742, 43763, 43765, 65392, 65393, 65438, 65440, 92992, 92996, 94176, 94177, 1114112};
+static const wchar32 CAT_Lm_LETTER[] = {688, 706, 710, 711, 712, 720, 736, 741, 748, 749, 750, 751, 1369, 1370, 1765, 1767, 2074, 2075, 2084, 2085, 2088, 2089, 4348, 4349, 6211, 6212, 7468, 7531, 7544, 7545, 7579, 7616, 8305, 8306, 8319, 8320, 8336, 8349, 11388, 11390, 11631, 11632, 42232, 42238, 42652, 42654, 42775, 42784, 42864, 42865, 42888, 42889, 43000, 43002, 43494, 43495, 43632, 43633, 43868, 43872, 94099, 94112, 1114112};
+static const wchar32 CAT_Lo[] = {170, 171, 186, 187, 443, 444, 448, 452, 660, 661, 1488, 1515, 1520, 1523, 1568, 1600, 1601, 1611, 1646, 1648, 1649, 1748, 1749, 1750, 1774, 1776, 1786, 1789, 1791, 1792, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1970, 1994, 2027, 2048, 2070, 2112, 2137, 2208, 2229, 2230, 2238, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2418, 2433, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2493, 2494, 2510, 2511, 2524, 2526, 2527, 2530, 2544, 2546, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2649, 2653, 2654, 2655, 2674, 2677, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2749, 2750, 2768, 2769, 2784, 2786, 2809, 2810, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2877, 2878, 2908, 2910, 2911, 2914, 2929, 2930, 2947, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3024, 3025, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3134, 3160, 3163, 3168, 3170, 3200, 3201, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3261, 3262, 3294, 3295, 3296, 3298, 3313, 3315, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3390, 3406, 3407, 3412, 3415, 3423, 3426, 3450, 3456, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3585, 3633, 3634, 3636, 3648, 3654, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3761, 3762, 3764, 3773, 3774, 3776, 3781, 3804, 3808, 3840, 3841, 3904, 3912, 3913, 3949, 3976, 3981, 4096, 4139, 4159, 4160, 4176, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4304, 4347, 4349, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4992, 5008, 5121, 5741, 5743, 5760, 5761, 5787, 5792, 5867, 5873, 5881, 5888, 5901, 5902, 5906, 5920, 5938, 5952, 5970, 5984, 5997, 5998, 6001, 6016, 6068, 6108, 6109, 6176, 6211, 6212, 6264, 6272, 6277, 6279, 6313, 6314, 6315, 6320, 6390, 6400, 6431, 6480, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6656, 6679, 6688, 6741, 6917, 6964, 6981, 6988, 7043, 7073, 7086, 7088, 7098, 7142, 7168, 7204, 7245, 7248, 7258, 7288, 7401, 7405, 7406, 7410, 7413, 7415, 8501, 8505, 11568, 11624, 11648, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 12294, 12295, 12348, 12349, 12353, 12439, 12447, 12448, 12449, 12539, 12543, 12544, 12549, 12590, 12593, 12687, 12704, 12731, 12784, 12800, 13312, 19894, 19968, 40918, 40960, 40981, 40982, 42125, 42192, 42232, 42240, 42508, 42512, 42528, 42538, 42540, 42606, 42607, 42656, 42726, 42895, 42896, 42999, 43000, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43072, 43124, 43138, 43188, 43250, 43256, 43259, 43260, 43261, 43262, 43274, 43302, 43312, 43335, 43360, 43389, 43396, 43443, 43488, 43493, 43495, 43504, 43514, 43519, 43520, 43561, 43584, 43587, 43588, 43596, 43616, 43632, 43633, 43639, 43642, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43715, 43739, 43741, 43744, 43755, 43762, 43763, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43968, 44003, 44032, 55204, 55216, 55239, 55243, 55292, 63744, 64110, 64112, 64218, 64285, 64286, 64287, 64297, 64298, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64434, 64467, 64830, 64848, 64912, 64914, 64968, 65008, 65020, 65136, 65141, 65142, 65277, 65382, 65392, 65393, 65438, 65440, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 66176, 66205, 66208, 66257, 66304, 66336, 66352, 66369, 66370, 66378, 66384, 66422, 66432, 66462, 66464, 66500, 66504, 66512, 66640, 66718, 66816, 66856, 66864, 66916, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67680, 67703, 67712, 67743, 67808, 67827, 67828, 67830, 67840, 67862, 67872, 67898, 67968, 68024, 68030, 68032, 68096, 68097, 68112, 68116, 68117, 68120, 68121, 68148, 68192, 68221, 68224, 68253, 68288, 68296, 68297, 68325, 68352, 68406, 68416, 68438, 68448, 68467, 68480, 68498, 68608, 68681, 69635, 69688, 69763, 69808, 69840, 69865, 69891, 69927, 69968, 70003, 70006, 70007, 70019, 70067, 70081, 70085, 70106, 70107, 70108, 70109, 70144, 70162, 70163, 70188, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70313, 70320, 70367, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70461, 70462, 70480, 70481, 70493, 70498, 70656, 70709, 70727, 70731, 70784, 70832, 70852, 70854, 70855, 70856, 71040, 71087, 71128, 71132, 71168, 71216, 71236, 71237, 71296, 71339, 71424, 71450, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72751, 72768, 72769, 72818, 72848, 73728, 74650, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92880, 92910, 92928, 92976, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94033, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 124928, 125125, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 1114112};
+static const wchar32 CAT_Lo_HIRAGANA[] = {12353, 12439, 12447, 12448, 110593, 110594, 1114112};
+static const wchar32 CAT_Lo_IDEOGRAPH[] = {12294, 12295, 13312, 19894, 19968, 40918, 63744, 64110, 64112, 64218, 94208, 100333, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 1114112};
+static const wchar32 CAT_Lo_KATAKANA[] = {12449, 12539, 12543, 12544, 12784, 12800, 65382, 65392, 65393, 65438, 110592, 110593, 1114112};
+static const wchar32 CAT_Lo_LEADING[] = {4352, 4448, 43360, 43389, 1114112};
+static const wchar32 CAT_Lo_OTHER[] = {170, 171, 186, 187, 443, 444, 448, 452, 660, 661, 1488, 1515, 1520, 1523, 1568, 1600, 1601, 1611, 1646, 1648, 1649, 1748, 1749, 1750, 1774, 1776, 1786, 1789, 1791, 1792, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1970, 1994, 2027, 2048, 2070, 2112, 2137, 2208, 2229, 2230, 2238, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2418, 2433, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2493, 2494, 2510, 2511, 2524, 2526, 2527, 2530, 2544, 2546, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2649, 2653, 2654, 2655, 2674, 2677, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2749, 2750, 2768, 2769, 2784, 2786, 2809, 2810, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2877, 2878, 2908, 2910, 2911, 2914, 2929, 2930, 2947, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3024, 3025, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3134, 3160, 3163, 3168, 3170, 3200, 3201, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3261, 3262, 3294, 3295, 3296, 3298, 3313, 3315, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3390, 3406, 3407, 3412, 3415, 3423, 3426, 3450, 3456, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3585, 3633, 3634, 3636, 3648, 3654, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3761, 3762, 3764, 3773, 3774, 3776, 3781, 3804, 3808, 3840, 3841, 3904, 3912, 3913, 3949, 3976, 3981, 4096, 4139, 4159, 4160, 4176, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4304, 4347, 4349, 4352, 4608, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4992, 5008, 5121, 5741, 5743, 5760, 5761, 5787, 5792, 5867, 5873, 5881, 5888, 5901, 5902, 5906, 5920, 5938, 5952, 5970, 5984, 5997, 5998, 6001, 6016, 6068, 6108, 6109, 6176, 6211, 6212, 6264, 6272, 6277, 6279, 6313, 6314, 6315, 6320, 6390, 6400, 6431, 6480, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6656, 6679, 6688, 6741, 6917, 6964, 6981, 6988, 7043, 7073, 7086, 7088, 7098, 7142, 7168, 7204, 7245, 7248, 7258, 7288, 7401, 7405, 7406, 7410, 7413, 7415, 8501, 8505, 11568, 11624, 11648, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 12348, 12349, 12549, 12590, 12593, 12687, 12704, 12731, 40960, 40981, 40982, 42125, 42192, 42232, 42240, 42508, 42512, 42528, 42538, 42540, 42606, 42607, 42656, 42726, 42895, 42896, 42999, 43000, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43072, 43124, 43138, 43188, 43250, 43256, 43259, 43260, 43261, 43262, 43274, 43302, 43312, 43335, 43396, 43443, 43488, 43493, 43495, 43504, 43514, 43519, 43520, 43561, 43584, 43587, 43588, 43596, 43616, 43632, 43633, 43639, 43642, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43715, 43739, 43741, 43744, 43755, 43762, 43763, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43968, 44003, 44032, 55204, 64285, 64286, 64287, 64297, 64298, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64434, 64467, 64830, 64848, 64912, 64914, 64968, 65008, 65020, 65136, 65141, 65142, 65277, 65440, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 66176, 66205, 66208, 66257, 66304, 66336, 66352, 66369, 66370, 66378, 66384, 66422, 66432, 66462, 66464, 66500, 66504, 66512, 66640, 66718, 66816, 66856, 66864, 66916, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67680, 67703, 67712, 67743, 67808, 67827, 67828, 67830, 67840, 67862, 67872, 67898, 67968, 68024, 68030, 68032, 68096, 68097, 68112, 68116, 68117, 68120, 68121, 68148, 68192, 68221, 68224, 68253, 68288, 68296, 68297, 68325, 68352, 68406, 68416, 68438, 68448, 68467, 68480, 68498, 68608, 68681, 69635, 69688, 69763, 69808, 69840, 69865, 69891, 69927, 69968, 70003, 70006, 70007, 70019, 70067, 70081, 70085, 70106, 70107, 70108, 70109, 70144, 70162, 70163, 70188, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70313, 70320, 70367, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70461, 70462, 70480, 70481, 70493, 70498, 70656, 70709, 70727, 70731, 70784, 70832, 70852, 70854, 70855, 70856, 71040, 71087, 71128, 71132, 71168, 71216, 71236, 71237, 71296, 71339, 71424, 71450, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72751, 72768, 72769, 72818, 72848, 73728, 74650, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92880, 92910, 92928, 92976, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94033, 100352, 101107, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 124928, 125125, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 1114112};
+static const wchar32 CAT_Lo_TRAILING[] = {4520, 4608, 55243, 55292, 1114112};
+static const wchar32 CAT_Lo_VOWEL[] = {4448, 4520, 55216, 55239, 1114112};
+static const wchar32 CAT_Lt[] = {453, 454, 456, 457, 459, 460, 498, 499, 8072, 8080, 8088, 8096, 8104, 8112, 8124, 8125, 8140, 8141, 8188, 8189, 1114112};
+static const wchar32 CAT_Lt_TITLE[] = {453, 454, 456, 457, 459, 460, 498, 499, 8072, 8080, 8088, 8096, 8104, 8112, 8124, 8125, 8140, 8141, 8188, 8189, 1114112};
+static const wchar32 CAT_Lu[] = {65, 91, 192, 215, 216, 223, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 444, 445, 452, 453, 455, 456, 458, 459, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 498, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 880, 881, 882, 883, 886, 887, 895, 896, 902, 903, 904, 907, 908, 909, 910, 912, 913, 930, 931, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1015, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1329, 1367, 4256, 4294, 4295, 4296, 4301, 4302, 5024, 5110, 7680, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7960, 7966, 7976, 7984, 7992, 8000, 8008, 8014, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8032, 8040, 8048, 8120, 8124, 8136, 8140, 8152, 8156, 8168, 8173, 8184, 8188, 8450, 8451, 8455, 8456, 8459, 8462, 8464, 8467, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8496, 8500, 8510, 8512, 8517, 8518, 8579, 8580, 11264, 11311, 11360, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11390, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11499, 11500, 11501, 11502, 11506, 11507, 42560, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42624, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42786, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42891, 42892, 42893, 42894, 42896, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42927, 42928, 42933, 42934, 42935, 65313, 65339, 66560, 66600, 66736, 66772, 68736, 68787, 71840, 71872, 119808, 119834, 119860, 119886, 119912, 119938, 119964, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119990, 120016, 120042, 120068, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120120, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120488, 120513, 120546, 120571, 120604, 120629, 120662, 120687, 120720, 120745, 120778, 120779, 125184, 125218, 1114112};
+static const wchar32 CAT_Lu_UPPER[] = {65, 91, 192, 215, 216, 223, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 444, 445, 452, 453, 455, 456, 458, 459, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 498, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 880, 881, 882, 883, 886, 887, 895, 896, 902, 903, 904, 907, 908, 909, 910, 912, 913, 930, 931, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1015, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1329, 1367, 4256, 4294, 4295, 4296, 4301, 4302, 5024, 5110, 7680, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7960, 7966, 7976, 7984, 7992, 8000, 8008, 8014, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8032, 8040, 8048, 8120, 8124, 8136, 8140, 8152, 8156, 8168, 8173, 8184, 8188, 8450, 8451, 8455, 8456, 8459, 8462, 8464, 8467, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8496, 8500, 8510, 8512, 8517, 8518, 8579, 8580, 11264, 11311, 11360, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11390, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11499, 11500, 11501, 11502, 11506, 11507, 42560, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42624, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42786, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42891, 42892, 42893, 42894, 42896, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42927, 42928, 42933, 42934, 42935, 65313, 65339, 66560, 66600, 66736, 66772, 68736, 68787, 71840, 71872, 119808, 119834, 119860, 119886, 119912, 119938, 119964, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119990, 120016, 120042, 120068, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120120, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120488, 120513, 120546, 120571, 120604, 120629, 120662, 120687, 120720, 120745, 120778, 120779, 125184, 125218, 1114112};
+static const wchar32 CAT_M[] = {768, 880, 1155, 1162, 1425, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1480, 1552, 1563, 1611, 1632, 1648, 1649, 1750, 1757, 1759, 1765, 1767, 1769, 1770, 1774, 1809, 1810, 1840, 1867, 1958, 1969, 2027, 2036, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2094, 2137, 2140, 2260, 2274, 2275, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2404, 2433, 2436, 2492, 2493, 2494, 2501, 2503, 2505, 2507, 2510, 2519, 2520, 2530, 2532, 2561, 2564, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2672, 2674, 2677, 2678, 2689, 2692, 2748, 2749, 2750, 2758, 2759, 2762, 2763, 2766, 2786, 2788, 2817, 2820, 2876, 2877, 2878, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2914, 2916, 2946, 2947, 3006, 3011, 3014, 3017, 3018, 3022, 3031, 3032, 3072, 3076, 3134, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3170, 3172, 3201, 3204, 3260, 3261, 3262, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3298, 3300, 3329, 3332, 3390, 3397, 3398, 3401, 3402, 3406, 3415, 3416, 3426, 3428, 3458, 3460, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3570, 3572, 3633, 3634, 3636, 3643, 3655, 3663, 3761, 3762, 3764, 3770, 3771, 3773, 3784, 3790, 3864, 3866, 3893, 3894, 3895, 3896, 3897, 3898, 3902, 3904, 3953, 3973, 3974, 3976, 3981, 3992, 3993, 4029, 4038, 4039, 4139, 4159, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4240, 4250, 4254, 4957, 4960, 5906, 5909, 5938, 5941, 5970, 5972, 6002, 6004, 6068, 6100, 6109, 6110, 6155, 6158, 6277, 6279, 6313, 6314, 6432, 6444, 6448, 6460, 6679, 6684, 6741, 6751, 6752, 6781, 6783, 6784, 6832, 6847, 6912, 6917, 6964, 6981, 7019, 7028, 7040, 7043, 7073, 7086, 7142, 7156, 7204, 7224, 7376, 7379, 7380, 7401, 7405, 7406, 7410, 7413, 7416, 7418, 7616, 7670, 7675, 7680, 8400, 8433, 11503, 11506, 11647, 11648, 11744, 11776, 12330, 12336, 12441, 12443, 42607, 42611, 42612, 42622, 42654, 42656, 42736, 42738, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43048, 43136, 43138, 43188, 43206, 43232, 43250, 43302, 43310, 43335, 43348, 43392, 43396, 43443, 43457, 43493, 43494, 43561, 43575, 43587, 43588, 43596, 43598, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43755, 43760, 43765, 43767, 44003, 44011, 44012, 44014, 64286, 64287, 65024, 65040, 65056, 65072, 66045, 66046, 66272, 66273, 66422, 66427, 68097, 68100, 68101, 68103, 68108, 68112, 68152, 68155, 68159, 68160, 68325, 68327, 69632, 69635, 69688, 69703, 69759, 69763, 69808, 69819, 69888, 69891, 69927, 69941, 70003, 70004, 70016, 70019, 70067, 70081, 70090, 70093, 70188, 70200, 70206, 70207, 70367, 70379, 70400, 70404, 70460, 70461, 70462, 70469, 70471, 70473, 70475, 70478, 70487, 70488, 70498, 70500, 70502, 70509, 70512, 70517, 70709, 70727, 70832, 70852, 71087, 71094, 71096, 71105, 71132, 71134, 71216, 71233, 71339, 71352, 71453, 71468, 72751, 72759, 72760, 72768, 72850, 72872, 72873, 72887, 92912, 92917, 92976, 92983, 94033, 94079, 94095, 94099, 113821, 113823, 119141, 119146, 119149, 119155, 119163, 119171, 119173, 119180, 119210, 119214, 119362, 119365, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 125136, 125143, 125252, 125259, 917760, 918000, 1114112};
+static const wchar32 CAT_Mc[] = {2307, 2308, 2363, 2364, 2366, 2369, 2377, 2381, 2382, 2384, 2434, 2436, 2494, 2497, 2503, 2505, 2507, 2509, 2519, 2520, 2563, 2564, 2622, 2625, 2691, 2692, 2750, 2753, 2761, 2762, 2763, 2765, 2818, 2820, 2878, 2879, 2880, 2881, 2887, 2889, 2891, 2893, 2903, 2904, 3006, 3008, 3009, 3011, 3014, 3017, 3018, 3021, 3031, 3032, 3073, 3076, 3137, 3141, 3202, 3204, 3262, 3263, 3264, 3269, 3271, 3273, 3274, 3276, 3285, 3287, 3330, 3332, 3390, 3393, 3398, 3401, 3402, 3405, 3415, 3416, 3458, 3460, 3535, 3538, 3544, 3552, 3570, 3572, 3902, 3904, 3967, 3968, 4139, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4182, 4184, 4194, 4197, 4199, 4206, 4227, 4229, 4231, 4237, 4239, 4240, 4250, 4253, 6070, 6071, 6078, 6086, 6087, 6089, 6435, 6439, 6441, 6444, 6448, 6450, 6451, 6457, 6681, 6683, 6741, 6742, 6743, 6744, 6753, 6754, 6755, 6757, 6765, 6771, 6916, 6917, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 6981, 7042, 7043, 7073, 7074, 7078, 7080, 7082, 7083, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7156, 7204, 7212, 7220, 7222, 7393, 7394, 7410, 7412, 12334, 12336, 43043, 43045, 43047, 43048, 43136, 43138, 43188, 43204, 43346, 43348, 43395, 43396, 43444, 43446, 43450, 43452, 43453, 43457, 43567, 43569, 43571, 43573, 43597, 43598, 43643, 43644, 43645, 43646, 43755, 43756, 43758, 43760, 43765, 43766, 44003, 44005, 44006, 44008, 44009, 44011, 44012, 44013, 69632, 69633, 69634, 69635, 69762, 69763, 69808, 69811, 69815, 69817, 69932, 69933, 70018, 70019, 70067, 70070, 70079, 70081, 70188, 70191, 70194, 70196, 70197, 70198, 70368, 70371, 70402, 70404, 70462, 70464, 70465, 70469, 70471, 70473, 70475, 70478, 70487, 70488, 70498, 70500, 70709, 70712, 70720, 70722, 70725, 70726, 70832, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 71087, 71090, 71096, 71100, 71102, 71103, 71216, 71219, 71227, 71229, 71230, 71231, 71340, 71341, 71342, 71344, 71350, 71351, 71456, 71458, 71462, 71463, 72751, 72752, 72766, 72767, 72873, 72874, 72881, 72882, 72884, 72885, 94033, 94079, 119141, 119143, 119149, 119155, 1114112};
+static const wchar32 CAT_Mc_SPACING[] = {2307, 2308, 2363, 2364, 2366, 2369, 2377, 2381, 2382, 2384, 2434, 2436, 2494, 2497, 2503, 2505, 2507, 2509, 2519, 2520, 2563, 2564, 2622, 2625, 2691, 2692, 2750, 2753, 2761, 2762, 2763, 2765, 2818, 2820, 2878, 2879, 2880, 2881, 2887, 2889, 2891, 2893, 2903, 2904, 3006, 3008, 3009, 3011, 3014, 3017, 3018, 3021, 3031, 3032, 3073, 3076, 3137, 3141, 3202, 3204, 3262, 3263, 3264, 3269, 3271, 3273, 3274, 3276, 3285, 3287, 3330, 3332, 3390, 3393, 3398, 3401, 3402, 3405, 3415, 3416, 3458, 3460, 3535, 3538, 3544, 3552, 3570, 3572, 3902, 3904, 3967, 3968, 4139, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4182, 4184, 4194, 4197, 4199, 4206, 4227, 4229, 4231, 4237, 4239, 4240, 4250, 4253, 6070, 6071, 6078, 6086, 6087, 6089, 6435, 6439, 6441, 6444, 6448, 6450, 6451, 6457, 6681, 6683, 6741, 6742, 6743, 6744, 6753, 6754, 6755, 6757, 6765, 6771, 6916, 6917, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 6981, 7042, 7043, 7073, 7074, 7078, 7080, 7082, 7083, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7156, 7204, 7212, 7220, 7222, 7393, 7394, 7410, 7412, 12334, 12336, 43043, 43045, 43047, 43048, 43136, 43138, 43188, 43204, 43346, 43348, 43395, 43396, 43444, 43446, 43450, 43452, 43453, 43457, 43567, 43569, 43571, 43573, 43597, 43598, 43643, 43644, 43645, 43646, 43755, 43756, 43758, 43760, 43765, 43766, 44003, 44005, 44006, 44008, 44009, 44011, 44012, 44013, 69632, 69633, 69634, 69635, 69762, 69763, 69808, 69811, 69815, 69817, 69932, 69933, 70018, 70019, 70067, 70070, 70079, 70081, 70188, 70191, 70194, 70196, 70197, 70198, 70368, 70371, 70402, 70404, 70462, 70464, 70465, 70469, 70471, 70473, 70475, 70478, 70487, 70488, 70498, 70500, 70709, 70712, 70720, 70722, 70725, 70726, 70832, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 71087, 71090, 71096, 71100, 71102, 71103, 71216, 71219, 71227, 71229, 71230, 71231, 71340, 71341, 71342, 71344, 71350, 71351, 71456, 71458, 71462, 71463, 72751, 72752, 72766, 72767, 72873, 72874, 72881, 72882, 72884, 72885, 94033, 94079, 119141, 119143, 119149, 119155, 1114112};
+static const wchar32 CAT_Me[] = {1160, 1162, 6846, 6847, 8413, 8417, 8418, 8421, 42608, 42611, 1114112};
+static const wchar32 CAT_Me_ENCLOSING[] = {1160, 1162, 6846, 6847, 8413, 8417, 8418, 8421, 42608, 42611, 1114112};
+static const wchar32 CAT_Mn[] = {768, 880, 1155, 1160, 1425, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1480, 1552, 1563, 1611, 1632, 1648, 1649, 1750, 1757, 1759, 1765, 1767, 1769, 1770, 1774, 1809, 1810, 1840, 1867, 1958, 1969, 2027, 2036, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2094, 2137, 2140, 2260, 2274, 2275, 2307, 2362, 2363, 2364, 2365, 2369, 2377, 2381, 2382, 2385, 2392, 2402, 2404, 2433, 2434, 2492, 2493, 2497, 2501, 2509, 2510, 2530, 2532, 2561, 2563, 2620, 2621, 2625, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2672, 2674, 2677, 2678, 2689, 2691, 2748, 2749, 2753, 2758, 2759, 2761, 2765, 2766, 2786, 2788, 2817, 2818, 2876, 2877, 2879, 2880, 2881, 2885, 2893, 2894, 2902, 2903, 2914, 2916, 2946, 2947, 3008, 3009, 3021, 3022, 3072, 3073, 3134, 3137, 3142, 3145, 3146, 3150, 3157, 3159, 3170, 3172, 3201, 3202, 3260, 3261, 3263, 3264, 3270, 3271, 3276, 3278, 3298, 3300, 3329, 3330, 3393, 3397, 3405, 3406, 3426, 3428, 3530, 3531, 3538, 3541, 3542, 3543, 3633, 3634, 3636, 3643, 3655, 3663, 3761, 3762, 3764, 3770, 3771, 3773, 3784, 3790, 3864, 3866, 3893, 3894, 3895, 3896, 3897, 3898, 3953, 3967, 3968, 3973, 3974, 3976, 3981, 3992, 3993, 4029, 4038, 4039, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4159, 4184, 4186, 4190, 4193, 4209, 4213, 4226, 4227, 4229, 4231, 4237, 4238, 4253, 4254, 4957, 4960, 5906, 5909, 5938, 5941, 5970, 5972, 6002, 6004, 6068, 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6109, 6110, 6155, 6158, 6277, 6279, 6313, 6314, 6432, 6435, 6439, 6441, 6450, 6451, 6457, 6460, 6679, 6681, 6683, 6684, 6742, 6743, 6744, 6751, 6752, 6753, 6754, 6755, 6757, 6765, 6771, 6781, 6783, 6784, 6832, 6846, 6912, 6916, 6964, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 7019, 7028, 7040, 7042, 7074, 7078, 7080, 7082, 7083, 7086, 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7212, 7220, 7222, 7224, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, 7412, 7413, 7416, 7418, 7616, 7670, 7675, 7680, 8400, 8413, 8417, 8418, 8421, 8433, 11503, 11506, 11647, 11648, 11744, 11776, 12330, 12334, 12441, 12443, 42607, 42608, 42612, 42622, 42654, 42656, 42736, 42738, 43010, 43011, 43014, 43015, 43019, 43020, 43045, 43047, 43204, 43206, 43232, 43250, 43302, 43310, 43335, 43346, 43392, 43395, 43443, 43444, 43446, 43450, 43452, 43453, 43493, 43494, 43561, 43567, 43569, 43571, 43573, 43575, 43587, 43588, 43596, 43597, 43644, 43645, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43756, 43758, 43766, 43767, 44005, 44006, 44008, 44009, 44013, 44014, 64286, 64287, 65024, 65040, 65056, 65072, 66045, 66046, 66272, 66273, 66422, 66427, 68097, 68100, 68101, 68103, 68108, 68112, 68152, 68155, 68159, 68160, 68325, 68327, 69633, 69634, 69688, 69703, 69759, 69762, 69811, 69815, 69817, 69819, 69888, 69891, 69927, 69932, 69933, 69941, 70003, 70004, 70016, 70018, 70070, 70079, 70090, 70093, 70191, 70194, 70196, 70197, 70198, 70200, 70206, 70207, 70367, 70368, 70371, 70379, 70400, 70402, 70460, 70461, 70464, 70465, 70502, 70509, 70512, 70517, 70712, 70720, 70722, 70725, 70726, 70727, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 70852, 71090, 71094, 71100, 71102, 71103, 71105, 71132, 71134, 71219, 71227, 71229, 71230, 71231, 71233, 71339, 71340, 71341, 71342, 71344, 71350, 71351, 71352, 71453, 71456, 71458, 71462, 71463, 71468, 72752, 72759, 72760, 72766, 72767, 72768, 72850, 72872, 72874, 72881, 72882, 72884, 72885, 72887, 92912, 92917, 92976, 92983, 94095, 94099, 113821, 113823, 119143, 119146, 119163, 119171, 119173, 119180, 119210, 119214, 119362, 119365, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 125136, 125143, 125252, 125259, 917760, 918000, 1114112};
+static const wchar32 CAT_Mn_NONSPACING[] = {768, 880, 1155, 1160, 1425, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1480, 1552, 1563, 1611, 1632, 1648, 1649, 1750, 1757, 1759, 1765, 1767, 1769, 1770, 1774, 1809, 1810, 1840, 1867, 1958, 1969, 2027, 2036, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2094, 2137, 2140, 2260, 2274, 2275, 2307, 2362, 2363, 2364, 2365, 2369, 2377, 2381, 2382, 2385, 2392, 2402, 2404, 2433, 2434, 2492, 2493, 2497, 2501, 2509, 2510, 2530, 2532, 2561, 2563, 2620, 2621, 2625, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2672, 2674, 2677, 2678, 2689, 2691, 2748, 2749, 2753, 2758, 2759, 2761, 2765, 2766, 2786, 2788, 2817, 2818, 2876, 2877, 2879, 2880, 2881, 2885, 2893, 2894, 2902, 2903, 2914, 2916, 2946, 2947, 3008, 3009, 3021, 3022, 3072, 3073, 3134, 3137, 3142, 3145, 3146, 3150, 3157, 3159, 3170, 3172, 3201, 3202, 3260, 3261, 3263, 3264, 3270, 3271, 3276, 3278, 3298, 3300, 3329, 3330, 3393, 3397, 3405, 3406, 3426, 3428, 3530, 3531, 3538, 3541, 3542, 3543, 3633, 3634, 3636, 3643, 3655, 3663, 3761, 3762, 3764, 3770, 3771, 3773, 3784, 3790, 3864, 3866, 3893, 3894, 3895, 3896, 3897, 3898, 3953, 3967, 3968, 3973, 3974, 3976, 3981, 3992, 3993, 4029, 4038, 4039, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4159, 4184, 4186, 4190, 4193, 4209, 4213, 4226, 4227, 4229, 4231, 4237, 4238, 4253, 4254, 4957, 4960, 5906, 5909, 5938, 5941, 5970, 5972, 6002, 6004, 6068, 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6109, 6110, 6155, 6158, 6277, 6279, 6313, 6314, 6432, 6435, 6439, 6441, 6450, 6451, 6457, 6460, 6679, 6681, 6683, 6684, 6742, 6743, 6744, 6751, 6752, 6753, 6754, 6755, 6757, 6765, 6771, 6781, 6783, 6784, 6832, 6846, 6912, 6916, 6964, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 7019, 7028, 7040, 7042, 7074, 7078, 7080, 7082, 7083, 7086, 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7212, 7220, 7222, 7224, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, 7412, 7413, 7416, 7418, 7616, 7670, 7675, 7680, 8400, 8413, 8417, 8418, 8421, 8433, 11503, 11506, 11647, 11648, 11744, 11776, 12330, 12334, 12441, 12443, 42607, 42608, 42612, 42622, 42654, 42656, 42736, 42738, 43010, 43011, 43014, 43015, 43019, 43020, 43045, 43047, 43204, 43206, 43232, 43250, 43302, 43310, 43335, 43346, 43392, 43395, 43443, 43444, 43446, 43450, 43452, 43453, 43493, 43494, 43561, 43567, 43569, 43571, 43573, 43575, 43587, 43588, 43596, 43597, 43644, 43645, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43756, 43758, 43766, 43767, 44005, 44006, 44008, 44009, 44013, 44014, 64286, 64287, 65024, 65040, 65056, 65072, 66045, 66046, 66272, 66273, 66422, 66427, 68097, 68100, 68101, 68103, 68108, 68112, 68152, 68155, 68159, 68160, 68325, 68327, 69633, 69634, 69688, 69703, 69759, 69762, 69811, 69815, 69817, 69819, 69888, 69891, 69927, 69932, 69933, 69941, 70003, 70004, 70016, 70018, 70070, 70079, 70090, 70093, 70191, 70194, 70196, 70197, 70198, 70200, 70206, 70207, 70367, 70368, 70371, 70379, 70400, 70402, 70460, 70461, 70464, 70465, 70502, 70509, 70512, 70517, 70712, 70720, 70722, 70725, 70726, 70727, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 70852, 71090, 71094, 71100, 71102, 71103, 71105, 71132, 71134, 71219, 71227, 71229, 71230, 71231, 71233, 71339, 71340, 71341, 71342, 71344, 71350, 71351, 71352, 71453, 71456, 71458, 71462, 71463, 71468, 72752, 72759, 72760, 72766, 72767, 72768, 72850, 72872, 72874, 72881, 72882, 72884, 72885, 72887, 92912, 92917, 92976, 92983, 94095, 94099, 113821, 113823, 119143, 119146, 119163, 119171, 119173, 119180, 119210, 119214, 119362, 119365, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 125136, 125143, 125252, 125259, 917760, 918000, 1114112};
+static const wchar32 CAT_N[] = {48, 58, 178, 180, 185, 186, 188, 191, 1632, 1642, 1776, 1786, 1984, 1994, 2406, 2416, 2534, 2544, 2548, 2554, 2662, 2672, 2790, 2800, 2918, 2928, 2930, 2936, 3046, 3059, 3174, 3184, 3192, 3199, 3302, 3312, 3416, 3423, 3430, 3449, 3558, 3568, 3664, 3674, 3792, 3802, 3872, 3892, 4160, 4170, 4240, 4250, 4969, 4989, 5870, 5873, 6112, 6122, 6128, 6138, 6160, 6170, 6470, 6480, 6608, 6619, 6784, 6794, 6800, 6810, 6992, 7002, 7088, 7098, 7232, 7242, 7248, 7258, 8304, 8305, 8308, 8314, 8320, 8330, 8528, 8579, 8581, 8586, 9312, 9372, 9450, 9472, 10102, 10132, 11517, 11518, 12295, 12296, 12321, 12330, 12344, 12347, 12690, 12694, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 42528, 42538, 42726, 42736, 43056, 43062, 43216, 43226, 43264, 43274, 43472, 43482, 43504, 43514, 43600, 43610, 44016, 44026, 65296, 65306, 65799, 65844, 65856, 65913, 65930, 65932, 66273, 66300, 66336, 66340, 66369, 66370, 66378, 66379, 66513, 66518, 66720, 66730, 67672, 67680, 67705, 67712, 67751, 67760, 67835, 67840, 67862, 67868, 68028, 68030, 68032, 68048, 68050, 68096, 68160, 68168, 68221, 68223, 68253, 68256, 68331, 68336, 68440, 68448, 68472, 68480, 68521, 68528, 68858, 68864, 69216, 69247, 69714, 69744, 69872, 69882, 69942, 69952, 70096, 70106, 70113, 70133, 70384, 70394, 70736, 70746, 70864, 70874, 71248, 71258, 71360, 71370, 71472, 71484, 71904, 71923, 72784, 72813, 74752, 74863, 92768, 92778, 93008, 93018, 93019, 93026, 119648, 119666, 120782, 120832, 125127, 125136, 125264, 125274, 127232, 127245, 1114112};
+static const wchar32 CAT_Nd[] = {48, 58, 1632, 1642, 1776, 1786, 1984, 1994, 2406, 2416, 2534, 2544, 2662, 2672, 2790, 2800, 2918, 2928, 3046, 3056, 3174, 3184, 3302, 3312, 3430, 3440, 3558, 3568, 3664, 3674, 3792, 3802, 3872, 3882, 4160, 4170, 4240, 4250, 6112, 6122, 6160, 6170, 6470, 6480, 6608, 6618, 6784, 6794, 6800, 6810, 6992, 7002, 7088, 7098, 7232, 7242, 7248, 7258, 42528, 42538, 43216, 43226, 43264, 43274, 43472, 43482, 43504, 43514, 43600, 43610, 44016, 44026, 65296, 65306, 66720, 66730, 69734, 69744, 69872, 69882, 69942, 69952, 70096, 70106, 70384, 70394, 70736, 70746, 70864, 70874, 71248, 71258, 71360, 71370, 71472, 71482, 71904, 71914, 72784, 72794, 92768, 92778, 93008, 93018, 120782, 120832, 125264, 125274, 1114112};
+static const wchar32 CAT_Nd_DIGIT[] = {48, 58, 1632, 1642, 1776, 1786, 1984, 1994, 2406, 2416, 2534, 2544, 2662, 2672, 2790, 2800, 2918, 2928, 3046, 3056, 3174, 3184, 3302, 3312, 3430, 3440, 3558, 3568, 3664, 3674, 3792, 3802, 3872, 3882, 4160, 4170, 4240, 4250, 6112, 6122, 6160, 6170, 6470, 6480, 6608, 6618, 6784, 6794, 6800, 6810, 6992, 7002, 7088, 7098, 7232, 7242, 7248, 7258, 42528, 42538, 43216, 43226, 43264, 43274, 43472, 43482, 43504, 43514, 43600, 43610, 44016, 44026, 65296, 65306, 66720, 66730, 69734, 69744, 69872, 69882, 69942, 69952, 70096, 70106, 70384, 70394, 70736, 70746, 70864, 70874, 71248, 71258, 71360, 71370, 71472, 71482, 71904, 71914, 72784, 72794, 92768, 92778, 93008, 93018, 120782, 120832, 125264, 125274, 1114112};
+static const wchar32 CAT_Nl[] = {5870, 5873, 8544, 8579, 8581, 8585, 12295, 12296, 12321, 12330, 12344, 12347, 42726, 42736, 65856, 65909, 66369, 66370, 66378, 66379, 66513, 66518, 74752, 74863, 1114112};
+static const wchar32 CAT_Nl_IDEOGRAPH[] = {12295, 12296, 12321, 12330, 12344, 12347, 1114112};
+static const wchar32 CAT_Nl_LETTER[] = {5870, 5873, 8544, 8579, 8581, 8585, 42726, 42736, 65856, 65909, 66369, 66370, 66378, 66379, 66513, 66518, 74752, 74863, 1114112};
+static const wchar32 CAT_No[] = {178, 180, 185, 186, 188, 191, 2548, 2554, 2930, 2936, 3056, 3059, 3192, 3199, 3416, 3423, 3440, 3449, 3882, 3892, 4969, 4989, 6128, 6138, 6618, 6619, 8304, 8305, 8308, 8314, 8320, 8330, 8528, 8544, 8585, 8586, 9312, 9372, 9450, 9472, 10102, 10132, 11517, 11518, 12690, 12694, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 43056, 43062, 65799, 65844, 65909, 65913, 65930, 65932, 66273, 66300, 66336, 66340, 67672, 67680, 67705, 67712, 67751, 67760, 67835, 67840, 67862, 67868, 68028, 68030, 68032, 68048, 68050, 68096, 68160, 68168, 68221, 68223, 68253, 68256, 68331, 68336, 68440, 68448, 68472, 68480, 68521, 68528, 68858, 68864, 69216, 69247, 69714, 69734, 70113, 70133, 71482, 71484, 71914, 71923, 72794, 72813, 93019, 93026, 119648, 119666, 125127, 125136, 127232, 127245, 1114112};
+static const wchar32 CAT_No_OTHER[] = {178, 180, 185, 186, 188, 191, 2548, 2554, 2930, 2936, 3056, 3059, 3192, 3199, 3416, 3423, 3440, 3449, 3882, 3892, 4969, 4989, 6128, 6138, 6618, 6619, 8304, 8305, 8308, 8314, 8320, 8330, 8528, 8544, 8585, 8586, 9312, 9372, 9450, 9472, 10102, 10132, 11517, 11518, 12690, 12694, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 43056, 43062, 65799, 65844, 65909, 65913, 65930, 65932, 66273, 66300, 66336, 66340, 67672, 67680, 67705, 67712, 67751, 67760, 67835, 67840, 67862, 67868, 68028, 68030, 68032, 68048, 68050, 68096, 68160, 68168, 68221, 68223, 68253, 68256, 68331, 68336, 68440, 68448, 68472, 68480, 68521, 68528, 68858, 68864, 69216, 69247, 69714, 69734, 70113, 70133, 71482, 71484, 71914, 71923, 72794, 72813, 93019, 93026, 119648, 119666, 125127, 125136, 127232, 127245, 1114112};
+static const wchar32 CAT_P[] = {33, 36, 37, 43, 44, 48, 58, 60, 63, 65, 91, 94, 95, 96, 123, 124, 125, 126, 161, 162, 167, 168, 171, 172, 182, 184, 187, 188, 191, 192, 894, 895, 903, 904, 1370, 1376, 1417, 1419, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1523, 1525, 1545, 1547, 1548, 1550, 1563, 1564, 1566, 1568, 1642, 1646, 1748, 1749, 1792, 1806, 2039, 2042, 2096, 2111, 2142, 2143, 2404, 2406, 2416, 2417, 2800, 2801, 3572, 3573, 3663, 3664, 3674, 3676, 3844, 3859, 3860, 3861, 3898, 3902, 3973, 3974, 4048, 4053, 4057, 4059, 4170, 4176, 4347, 4348, 4960, 4969, 5120, 5121, 5741, 5743, 5787, 5789, 5867, 5870, 5941, 5943, 6100, 6103, 6104, 6107, 6144, 6155, 6468, 6470, 6686, 6688, 6816, 6823, 6824, 6830, 7002, 7009, 7164, 7168, 7227, 7232, 7294, 7296, 7360, 7368, 7379, 7380, 8208, 8232, 8240, 8260, 8261, 8274, 8275, 8287, 8317, 8319, 8333, 8335, 8968, 8972, 9001, 9003, 10088, 10102, 10181, 10183, 10214, 10224, 10627, 10649, 10712, 10716, 10748, 10750, 11513, 11517, 11518, 11520, 11632, 11633, 11776, 11823, 11824, 11845, 12289, 12292, 12296, 12306, 12308, 12320, 12336, 12337, 12349, 12350, 12448, 12449, 12539, 12540, 42238, 42240, 42509, 42512, 42611, 42612, 42622, 42623, 42738, 42744, 43124, 43128, 43214, 43216, 43256, 43259, 43260, 43261, 43310, 43312, 43359, 43360, 43457, 43470, 43486, 43488, 43612, 43616, 43742, 43744, 43760, 43762, 44011, 44012, 64830, 64832, 65040, 65050, 65072, 65107, 65108, 65122, 65123, 65124, 65128, 65129, 65130, 65132, 65281, 65284, 65285, 65291, 65292, 65296, 65306, 65308, 65311, 65313, 65339, 65342, 65343, 65344, 65371, 65372, 65373, 65374, 65375, 65382, 65792, 65795, 66463, 66464, 66512, 66513, 66927, 66928, 67671, 67672, 67871, 67872, 67903, 67904, 68176, 68185, 68223, 68224, 68336, 68343, 68409, 68416, 68505, 68509, 69703, 69710, 69819, 69821, 69822, 69826, 69952, 69956, 70004, 70006, 70085, 70090, 70093, 70094, 70107, 70108, 70109, 70112, 70200, 70206, 70313, 70314, 70731, 70736, 70747, 70748, 70749, 70750, 70854, 70855, 71105, 71128, 71233, 71236, 71264, 71277, 71484, 71487, 72769, 72774, 72816, 72818, 74864, 74869, 92782, 92784, 92917, 92918, 92983, 92988, 92996, 92997, 113823, 113824, 121479, 121484, 125278, 125280, 1114112};
+static const wchar32 CAT_Pc[] = {95, 96, 8255, 8257, 8276, 8277, 65075, 65077, 65101, 65104, 65343, 65344, 1114112};
+static const wchar32 CAT_Pc_CONNECTOR[] = {95, 96, 8255, 8257, 8276, 8277, 65075, 65077, 65101, 65104, 65343, 65344, 1114112};
+static const wchar32 CAT_Pd[] = {45, 46, 1418, 1419, 1470, 1471, 5120, 5121, 6150, 6151, 8208, 8214, 11799, 11800, 11802, 11803, 11834, 11836, 11840, 11841, 12316, 12317, 12336, 12337, 12448, 12449, 65073, 65075, 65112, 65113, 65123, 65124, 65293, 65294, 1114112};
+static const wchar32 CAT_Pd_DASH[] = {1470, 1471, 8210, 8214, 11834, 11836, 12316, 12317, 12336, 12337, 65073, 65075, 65112, 65113, 1114112};
+static const wchar32 CAT_Pd_HYPHEN[] = {45, 46, 1418, 1419, 5120, 5121, 6150, 6151, 8208, 8210, 11799, 11800, 11802, 11803, 11840, 11841, 12448, 12449, 65123, 65124, 65293, 65294, 1114112};
+static const wchar32 CAT_Pe[] = {41, 42, 93, 94, 125, 126, 3899, 3900, 3901, 3902, 5788, 5789, 8262, 8263, 8318, 8319, 8334, 8335, 8969, 8970, 8971, 8972, 9002, 9003, 10089, 10090, 10091, 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, 10102, 10182, 10183, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10649, 10713, 10714, 10715, 10716, 10749, 10750, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 11818, 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12318, 12320, 64830, 64831, 65048, 65049, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65096, 65097, 65114, 65115, 65116, 65117, 65118, 65119, 65289, 65290, 65341, 65342, 65373, 65374, 65376, 65377, 65379, 65380, 1114112};
+static const wchar32 CAT_Pe_END[] = {41, 42, 93, 94, 125, 126, 3899, 3900, 3901, 3902, 5788, 5789, 8262, 8263, 8318, 8319, 8334, 8335, 8969, 8970, 8971, 8972, 9002, 9003, 10089, 10090, 10091, 10092, 10093, 10094, 10097, 10098, 10099, 10100, 10101, 10102, 10182, 10183, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10649, 10713, 10714, 10715, 10716, 10749, 10750, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 11818, 12297, 12298, 12299, 12300, 12305, 12306, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 64830, 64831, 65048, 65049, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65089, 65096, 65097, 65114, 65115, 65116, 65117, 65118, 65119, 65289, 65290, 65341, 65342, 65373, 65374, 65376, 65377, 1114112};
+static const wchar32 CAT_Pe_QUOTE[] = {10095, 10096, 12301, 12302, 12303, 12304, 12318, 12320, 65090, 65091, 65092, 65093, 65379, 65380, 1114112};
+static const wchar32 CAT_Pf[] = {187, 188, 8217, 8218, 8221, 8222, 8250, 8251, 11779, 11780, 11781, 11782, 11786, 11787, 11789, 11790, 11805, 11806, 11809, 11810, 1114112};
+static const wchar32 CAT_Pf_QUOTE[] = {187, 188, 8221, 8222, 8250, 8251, 11779, 11780, 11781, 11782, 11786, 11787, 11789, 11790, 11805, 11806, 11809, 11810, 1114112};
+static const wchar32 CAT_Pf_SINGLE_QUOTE[] = {8217, 8218, 1114112};
+static const wchar32 CAT_Pi[] = {171, 172, 8216, 8217, 8219, 8221, 8223, 8224, 8249, 8250, 11778, 11779, 11780, 11781, 11785, 11786, 11788, 11789, 11804, 11805, 11808, 11809, 1114112};
+static const wchar32 CAT_Pi_QUOTE[] = {171, 172, 8220, 8221, 8223, 8224, 8249, 8250, 11778, 11779, 11780, 11781, 11785, 11786, 11788, 11789, 11804, 11805, 11808, 11809, 1114112};
+static const wchar32 CAT_Pi_SINGLE_QUOTE[] = {8216, 8217, 8219, 8220, 1114112};
+static const wchar32 CAT_Po[] = {33, 36, 37, 40, 42, 43, 44, 45, 46, 48, 58, 60, 63, 65, 92, 93, 161, 162, 167, 168, 182, 184, 191, 192, 894, 895, 903, 904, 1370, 1376, 1417, 1418, 1472, 1473, 1475, 1476, 1478, 1479, 1523, 1525, 1545, 1547, 1548, 1550, 1563, 1564, 1566, 1568, 1642, 1646, 1748, 1749, 1792, 1806, 2039, 2042, 2096, 2111, 2142, 2143, 2404, 2406, 2416, 2417, 2800, 2801, 3572, 3573, 3663, 3664, 3674, 3676, 3844, 3859, 3860, 3861, 3973, 3974, 4048, 4053, 4057, 4059, 4170, 4176, 4347, 4348, 4960, 4969, 5741, 5743, 5867, 5870, 5941, 5943, 6100, 6103, 6104, 6107, 6144, 6150, 6151, 6155, 6468, 6470, 6686, 6688, 6816, 6823, 6824, 6830, 7002, 7009, 7164, 7168, 7227, 7232, 7294, 7296, 7360, 7368, 7379, 7380, 8214, 8216, 8224, 8232, 8240, 8249, 8251, 8255, 8257, 8260, 8263, 8274, 8275, 8276, 8277, 8287, 11513, 11517, 11518, 11520, 11632, 11633, 11776, 11778, 11782, 11785, 11787, 11788, 11790, 11799, 11800, 11802, 11803, 11804, 11806, 11808, 11818, 11823, 11824, 11834, 11836, 11840, 11841, 11842, 11843, 11845, 12289, 12292, 12349, 12350, 12539, 12540, 42238, 42240, 42509, 42512, 42611, 42612, 42622, 42623, 42738, 42744, 43124, 43128, 43214, 43216, 43256, 43259, 43260, 43261, 43310, 43312, 43359, 43360, 43457, 43470, 43486, 43488, 43612, 43616, 43742, 43744, 43760, 43762, 44011, 44012, 65040, 65047, 65049, 65050, 65072, 65073, 65093, 65095, 65097, 65101, 65104, 65107, 65108, 65112, 65119, 65122, 65128, 65129, 65130, 65132, 65281, 65284, 65285, 65288, 65290, 65291, 65292, 65293, 65294, 65296, 65306, 65308, 65311, 65313, 65340, 65341, 65377, 65378, 65380, 65382, 65792, 65795, 66463, 66464, 66512, 66513, 66927, 66928, 67671, 67672, 67871, 67872, 67903, 67904, 68176, 68185, 68223, 68224, 68336, 68343, 68409, 68416, 68505, 68509, 69703, 69710, 69819, 69821, 69822, 69826, 69952, 69956, 70004, 70006, 70085, 70090, 70093, 70094, 70107, 70108, 70109, 70112, 70200, 70206, 70313, 70314, 70731, 70736, 70747, 70748, 70749, 70750, 70854, 70855, 71105, 71128, 71233, 71236, 71264, 71277, 71484, 71487, 72769, 72774, 72816, 72818, 74864, 74869, 92782, 92784, 92917, 92918, 92983, 92988, 92996, 92997, 113823, 113824, 121479, 121484, 125278, 125280, 1114112};
+static const wchar32 CAT_Po_EXTENDER[] = {183, 184, 11825, 11826, 1114112};
+static const wchar32 CAT_Po_HYPHEN[] = {12539, 12540, 65381, 65382, 1114112};
+static const wchar32 CAT_Po_OTHER[] = {35, 36, 37, 39, 42, 43, 47, 48, 64, 65, 92, 93, 161, 162, 167, 168, 182, 183, 191, 192, 1370, 1376, 1472, 1473, 1475, 1476, 1478, 1479, 1523, 1525, 1545, 1547, 1549, 1550, 1566, 1567, 1642, 1646, 1792, 1793, 1802, 1806, 2039, 2040, 2096, 2111, 2142, 2143, 2416, 2417, 2800, 2801, 3572, 3573, 3663, 3664, 3674, 3676, 3844, 3859, 3860, 3861, 3973, 3974, 4048, 4053, 4057, 4059, 4170, 4176, 4347, 4348, 4960, 4962, 4968, 4969, 5741, 5742, 5867, 5870, 5941, 5943, 6100, 6103, 6104, 6107, 6144, 6145, 6149, 6150, 6151, 6152, 6154, 6155, 6686, 6688, 6816, 6823, 6824, 6830, 7002, 7009, 7164, 7168, 7227, 7232, 7294, 7296, 7360, 7368, 7379, 7380, 8214, 8216, 8224, 8230, 8231, 8232, 8240, 8242, 8248, 8249, 8251, 8252, 8254, 8255, 8257, 8260, 8266, 8271, 8272, 8274, 8275, 8276, 8277, 8279, 8280, 8287, 11516, 11517, 11519, 11520, 11632, 11633, 11776, 11778, 11782, 11785, 11787, 11788, 11790, 11799, 11800, 11802, 11803, 11804, 11806, 11808, 11818, 11822, 11824, 11825, 11827, 11828, 11830, 11834, 11837, 11840, 11843, 11845, 12291, 12292, 12349, 12350, 42611, 42612, 42622, 42623, 42738, 42739, 43124, 43128, 43256, 43259, 43260, 43261, 43310, 43312, 43359, 43360, 43457, 43470, 43486, 43488, 43612, 43613, 43742, 43744, 43760, 43762, 44011, 44012, 65072, 65073, 65093, 65095, 65097, 65101, 65119, 65122, 65128, 65129, 65130, 65132, 65283, 65284, 65285, 65287, 65290, 65291, 65295, 65296, 65312, 65313, 65340, 65341, 65792, 65795, 66463, 66464, 66512, 66513, 66927, 66928, 67671, 67672, 67871, 67872, 67903, 67904, 68176, 68182, 68184, 68185, 68223, 68224, 68336, 68343, 68409, 68416, 68505, 68509, 69705, 69710, 69819, 69821, 69822, 69824, 69952, 69953, 70004, 70006, 70087, 70090, 70093, 70094, 70107, 70108, 70109, 70112, 70202, 70206, 70313, 70314, 70734, 70736, 70747, 70748, 70749, 70750, 70854, 70855, 71105, 71106, 71108, 71128, 71235, 71236, 71264, 71277, 71484, 71487, 72771, 72774, 72816, 72818, 74864, 74865, 74867, 74869, 92983, 92988, 92996, 92997, 121483, 121484, 1114112};
+static const wchar32 CAT_Po_QUOTE[] = {34, 35, 8243, 8245, 8246, 8248, 8279, 8280, 65282, 65283, 1114112};
+static const wchar32 CAT_Po_SINGLE_QUOTE[] = {39, 40, 8242, 8243, 8245, 8246, 65287, 65288, 1114112};
+static const wchar32 CAT_Po_TERMINAL[] = {33, 34, 44, 45, 46, 47, 58, 60, 63, 64, 894, 895, 903, 904, 1417, 1418, 1548, 1549, 1563, 1564, 1567, 1568, 1748, 1749, 1793, 1802, 2040, 2042, 2404, 2406, 4962, 4968, 5742, 5743, 6145, 6149, 6152, 6154, 6468, 6470, 8230, 8231, 8252, 8254, 8263, 8266, 8271, 8272, 11513, 11516, 11518, 11519, 11822, 11823, 11826, 11827, 11828, 11830, 11836, 11837, 11841, 11842, 12289, 12291, 42238, 42240, 42509, 42512, 42739, 42744, 43214, 43216, 43613, 43616, 65040, 65047, 65049, 65050, 65104, 65107, 65108, 65112, 65281, 65282, 65292, 65293, 65294, 65295, 65306, 65308, 65311, 65312, 65377, 65378, 65380, 65381, 68182, 68184, 69703, 69705, 69824, 69826, 69953, 69956, 70085, 70087, 70200, 70202, 70731, 70734, 71106, 71108, 71233, 71235, 72769, 72771, 74865, 74867, 92782, 92784, 92917, 92918, 113823, 113824, 121479, 121483, 125278, 125280, 1114112};
+static const wchar32 CAT_Ps[] = {40, 41, 91, 92, 123, 124, 3898, 3899, 3900, 3901, 5787, 5788, 8218, 8219, 8222, 8223, 8261, 8262, 8317, 8318, 8333, 8334, 8968, 8969, 8970, 8971, 9001, 9002, 10088, 10089, 10090, 10091, 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, 10181, 10182, 10214, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10627, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10712, 10713, 10714, 10715, 10748, 10749, 11810, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 11842, 11843, 12296, 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12317, 12318, 64831, 64832, 65047, 65048, 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65089, 65090, 65091, 65092, 65095, 65096, 65113, 65114, 65115, 65116, 65117, 65118, 65288, 65289, 65339, 65340, 65371, 65372, 65375, 65376, 65378, 65379, 1114112};
+static const wchar32 CAT_Ps_QUOTE[] = {8222, 8223, 10094, 10095, 11842, 11843, 12300, 12301, 12302, 12303, 12317, 12318, 65089, 65090, 65091, 65092, 65378, 65379, 1114112};
+static const wchar32 CAT_Ps_SINGLE_QUOTE[] = {8218, 8219, 1114112};
+static const wchar32 CAT_Ps_START[] = {40, 41, 91, 92, 123, 124, 3898, 3899, 3900, 3901, 5787, 5788, 8261, 8262, 8317, 8318, 8333, 8334, 8968, 8969, 8970, 8971, 9001, 9002, 10088, 10089, 10090, 10091, 10092, 10093, 10096, 10097, 10098, 10099, 10100, 10101, 10181, 10182, 10214, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10627, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10712, 10713, 10714, 10715, 10748, 10749, 11810, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 12296, 12297, 12298, 12299, 12304, 12305, 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 64831, 64832, 65047, 65048, 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65095, 65096, 65113, 65114, 65115, 65116, 65117, 65118, 65288, 65289, 65339, 65340, 65371, 65372, 65375, 65376, 1114112};
+static const wchar32 CAT_S[] = {36, 37, 43, 44, 60, 63, 94, 95, 96, 97, 124, 125, 126, 127, 162, 167, 168, 170, 172, 173, 174, 178, 180, 181, 184, 185, 215, 216, 247, 248, 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 885, 886, 900, 902, 1014, 1015, 1154, 1155, 1421, 1424, 1542, 1545, 1547, 1548, 1550, 1552, 1758, 1759, 1769, 1770, 1789, 1791, 2038, 2039, 2546, 2548, 2554, 2556, 2801, 2802, 2928, 2929, 3059, 3067, 3199, 3200, 3407, 3408, 3449, 3450, 3647, 3648, 3841, 3844, 3859, 3860, 3861, 3864, 3866, 3872, 3892, 3893, 3894, 3895, 3896, 3897, 4030, 4038, 4039, 4045, 4046, 4048, 4053, 4057, 4254, 4256, 5008, 5018, 6107, 6108, 6464, 6465, 6622, 6656, 7009, 7019, 7028, 7037, 8125, 8126, 8127, 8130, 8141, 8144, 8157, 8160, 8173, 8176, 8189, 8191, 8260, 8261, 8274, 8275, 8314, 8317, 8330, 8333, 8352, 8383, 8448, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8512, 8517, 8522, 8526, 8527, 8528, 8586, 8588, 8592, 8968, 8972, 9001, 9003, 9215, 9216, 9255, 9280, 9291, 9372, 9450, 9472, 10088, 10132, 10181, 10183, 10214, 10224, 10627, 10649, 10712, 10716, 10748, 10750, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11493, 11499, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12292, 12293, 12306, 12308, 12320, 12321, 12342, 12344, 12350, 12352, 12443, 12445, 12688, 12690, 12694, 12704, 12736, 12772, 12800, 12831, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 13055, 13056, 13312, 19904, 19968, 42128, 42183, 42752, 42775, 42784, 42786, 42889, 42891, 43048, 43052, 43062, 43066, 43639, 43642, 43867, 43868, 64297, 64298, 64434, 64450, 65020, 65022, 65122, 65123, 65124, 65127, 65129, 65130, 65284, 65285, 65291, 65292, 65308, 65311, 65342, 65343, 65344, 65345, 65372, 65373, 65374, 65375, 65504, 65511, 65512, 65519, 65532, 65534, 65847, 65856, 65913, 65930, 65932, 65935, 65936, 65948, 65952, 65953, 66000, 66045, 67703, 67705, 68296, 68297, 71487, 71488, 92988, 92992, 92997, 92998, 113820, 113821, 118784, 119030, 119040, 119079, 119081, 119141, 119146, 119149, 119171, 119173, 119180, 119210, 119214, 119273, 119296, 119362, 119365, 119366, 119552, 119639, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 120832, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121479, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 1114112};
+static const wchar32 CAT_Sc[] = {36, 37, 162, 166, 1423, 1424, 1547, 1548, 2546, 2548, 2555, 2556, 2801, 2802, 3065, 3066, 3647, 3648, 6107, 6108, 8352, 8383, 43064, 43065, 65020, 65021, 65129, 65130, 65284, 65285, 65504, 65506, 65509, 65511, 1114112};
+static const wchar32 CAT_Sc_CURRENCY[] = {36, 37, 162, 166, 1423, 1424, 1547, 1548, 2546, 2548, 2555, 2556, 2801, 2802, 3065, 3066, 3647, 3648, 6107, 6108, 8352, 8383, 43064, 43065, 65020, 65021, 65129, 65130, 65284, 65285, 65504, 65506, 65509, 65511, 1114112};
+static const wchar32 CAT_Sk[] = {94, 95, 96, 97, 168, 169, 175, 176, 180, 181, 184, 185, 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 885, 886, 900, 902, 8125, 8126, 8127, 8130, 8141, 8144, 8157, 8160, 8173, 8176, 8189, 8191, 12443, 12445, 42752, 42775, 42784, 42786, 42889, 42891, 43867, 43868, 64434, 64450, 65342, 65343, 65344, 65345, 65507, 65508, 127995, 128000, 1114112};
+static const wchar32 CAT_Sk_MODIFIER[] = {94, 95, 96, 97, 168, 169, 175, 176, 180, 181, 184, 185, 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 885, 886, 900, 902, 8125, 8126, 8127, 8130, 8141, 8144, 8157, 8160, 8173, 8176, 8189, 8191, 12443, 12445, 42752, 42775, 42784, 42786, 42889, 42891, 43867, 43868, 64434, 64450, 65342, 65343, 65344, 65345, 65507, 65508, 127995, 128000, 1114112};
+static const wchar32 CAT_Sm[] = {43, 44, 60, 63, 124, 125, 126, 127, 172, 173, 177, 178, 215, 216, 247, 248, 1014, 1015, 1542, 1545, 8260, 8261, 8274, 8275, 8314, 8317, 8330, 8333, 8472, 8473, 8512, 8517, 8523, 8524, 8592, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8992, 8994, 9084, 9085, 9115, 9140, 9180, 9186, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10176, 10181, 10183, 10214, 10224, 10240, 10496, 10627, 10649, 10712, 10716, 10748, 10750, 11008, 11056, 11077, 11079, 11085, 64297, 64298, 65122, 65123, 65124, 65127, 65291, 65292, 65308, 65311, 65372, 65373, 65374, 65375, 65506, 65507, 65513, 65517, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 126704, 126706, 1114112};
+static const wchar32 CAT_Sm_MATH[] = {43, 44, 60, 63, 124, 125, 126, 127, 172, 173, 177, 178, 215, 216, 247, 248, 1014, 1015, 1542, 1545, 8260, 8261, 8274, 8275, 8314, 8315, 8316, 8317, 8330, 8331, 8332, 8333, 8472, 8473, 8512, 8517, 8523, 8524, 8592, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8722, 8723, 8960, 8992, 8994, 9084, 9085, 9115, 9140, 9180, 9186, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10176, 10181, 10183, 10214, 10224, 10240, 10496, 10627, 10649, 10712, 10716, 10748, 10750, 11008, 11056, 11077, 11079, 11085, 64297, 64298, 65122, 65123, 65124, 65127, 65291, 65292, 65308, 65311, 65372, 65373, 65374, 65375, 65506, 65507, 65513, 65517, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 126704, 126706, 1114112};
+static const wchar32 CAT_Sm_MINUS[] = {8315, 8316, 8331, 8332, 8722, 8723, 1114112};
+static const wchar32 CAT_So[] = {166, 167, 169, 170, 174, 175, 176, 177, 1154, 1155, 1421, 1423, 1550, 1552, 1758, 1759, 1769, 1770, 1789, 1791, 2038, 2039, 2554, 2555, 2928, 2929, 3059, 3065, 3066, 3067, 3199, 3200, 3407, 3408, 3449, 3450, 3841, 3844, 3859, 3860, 3861, 3864, 3866, 3872, 3892, 3893, 3894, 3895, 3896, 3897, 4030, 4038, 4039, 4045, 4046, 4048, 4053, 4057, 4254, 4256, 5008, 5018, 6464, 6465, 6622, 6656, 7009, 7019, 7028, 7037, 8448, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8472, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8522, 8523, 8524, 8526, 8527, 8528, 8586, 8588, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, 8992, 8994, 9001, 9003, 9084, 9085, 9115, 9140, 9180, 9186, 9215, 9216, 9255, 9280, 9291, 9372, 9450, 9472, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10088, 10132, 10176, 10240, 10496, 11008, 11056, 11077, 11079, 11085, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11493, 11499, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12292, 12293, 12306, 12308, 12320, 12321, 12342, 12344, 12350, 12352, 12688, 12690, 12694, 12704, 12736, 12772, 12800, 12831, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 13055, 13056, 13312, 19904, 19968, 42128, 42183, 43048, 43052, 43062, 43064, 43065, 43066, 43639, 43642, 65021, 65022, 65508, 65509, 65512, 65513, 65517, 65519, 65532, 65534, 65847, 65856, 65913, 65930, 65932, 65935, 65936, 65948, 65952, 65953, 66000, 66045, 67703, 67705, 68296, 68297, 71487, 71488, 92988, 92992, 92997, 92998, 113820, 113821, 118784, 119030, 119040, 119079, 119081, 119141, 119146, 119149, 119171, 119173, 119180, 119210, 119214, 119273, 119296, 119362, 119365, 119366, 119552, 119639, 120832, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121479, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 127995, 128000, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 1114112};
+static const wchar32 CAT_So_OTHER[] = {166, 167, 169, 170, 174, 175, 176, 177, 1154, 1155, 1421, 1423, 1550, 1552, 1758, 1759, 1769, 1770, 1789, 1791, 2038, 2039, 2554, 2555, 2928, 2929, 3059, 3065, 3066, 3067, 3199, 3200, 3407, 3408, 3449, 3450, 3841, 3844, 3859, 3860, 3861, 3864, 3866, 3872, 3892, 3893, 3894, 3895, 3896, 3897, 4030, 4038, 4039, 4045, 4046, 4048, 4053, 4057, 4254, 4256, 5008, 5018, 6464, 6465, 6622, 6656, 7009, 7019, 7028, 7037, 8448, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8472, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8522, 8523, 8524, 8526, 8527, 8528, 8586, 8588, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, 8992, 8994, 9001, 9003, 9084, 9085, 9115, 9140, 9180, 9186, 9215, 9216, 9255, 9280, 9291, 9372, 9450, 9472, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10088, 10132, 10176, 10240, 10496, 11008, 11056, 11077, 11079, 11085, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11493, 11499, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12292, 12293, 12306, 12308, 12320, 12321, 12342, 12344, 12350, 12352, 12688, 12690, 12694, 12704, 12736, 12772, 12800, 12831, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 13055, 13056, 13312, 19904, 19968, 42128, 42183, 43048, 43052, 43062, 43064, 43065, 43066, 43639, 43642, 65021, 65022, 65508, 65509, 65512, 65513, 65517, 65519, 65532, 65534, 65847, 65856, 65913, 65930, 65932, 65935, 65936, 65948, 65952, 65953, 66000, 66045, 67703, 67705, 68296, 68297, 71487, 71488, 92988, 92992, 92997, 92998, 113820, 113821, 118784, 119030, 119040, 119079, 119081, 119141, 119146, 119149, 119171, 119173, 119180, 119210, 119214, 119273, 119296, 119362, 119365, 119366, 119552, 119639, 120832, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121479, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 127995, 128000, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 1114112};
+static const wchar32 CAT_Z[] = {32, 33, 160, 161, 5760, 5761, 8192, 8204, 8232, 8234, 8239, 8240, 8287, 8288, 12288, 12289, 1114112};
+static const wchar32 CAT_Zl[] = {8232, 8233, 1114112};
+static const wchar32 CAT_Zl_LINE[] = {8232, 8233, 1114112};
+static const wchar32 CAT_Zp[] = {8233, 8234, 1114112};
+static const wchar32 CAT_Zp_PARAGRAPH[] = {8233, 8234, 1114112};
+static const wchar32 CAT_Zs[] = {32, 33, 160, 161, 5760, 5761, 8192, 8204, 8239, 8240, 8287, 8288, 12288, 12289, 1114112};
+static const wchar32 CAT_Zs_SPACE[] = {32, 33, 160, 161, 5760, 5761, 8192, 8203, 8239, 8240, 8287, 8288, 12288, 12289, 1114112};
+static const wchar32 CAT_Zs_ZWSPACE[] = {8203, 8204, 1114112};
+
+static const TCategoryRanges CATEGORY_RANGES[] = {
+ {1274, CAT_Cn_UNASSIGNED},
+ {1255, CAT_Lu_UPPER},
+ {1267, CAT_Ll_LOWER},
+ {21, CAT_Lt_TITLE},
+ {59, CAT_Lm_EXTENDER},
+ {63, CAT_Lm_LETTER},
+ {849, CAT_Lo_OTHER},
+ {23, CAT_Lo_IDEOGRAPH},
+ {13, CAT_Lo_KATAKANA},
+ {7, CAT_Lo_HIRAGANA},
+ {5, CAT_Lo_LEADING},
+ {5, CAT_Lo_VOWEL},
+ {5, CAT_Lo_TRAILING},
+ {571, CAT_Mn_NONSPACING},
+ {11, CAT_Me_ENCLOSING},
+ {311, CAT_Mc_SPACING},
+ {109, CAT_Nd_DIGIT},
+ {19, CAT_Nl_LETTER},
+ {7, CAT_Nl_IDEOGRAPH},
+ {121, CAT_No_OTHER},
+ {15, CAT_Zs_SPACE},
+ {3, CAT_Zs_ZWSPACE},
+ {3, CAT_Zl_LINE},
+ {3, CAT_Zp_PARAGRAPH},
+ {7, CAT_Cc_ASCII},
+ {3, CAT_Cc_SPACE},
+ {3, CAT_Cc_SEPARATOR},
+ {43, CAT_Cf_FORMAT},
+ {5, CAT_Cf_JOIN},
+ {19, CAT_Cf_BIDI},
+ {3, CAT_Cf_ZWNBSP},
+ {3, CAT_Cs_LOW},
+ {3, CAT_Cs_HIGH},
+ {15, CAT_Pd_DASH},
+ {23, CAT_Pd_HYPHEN},
+ {131, CAT_Ps_START},
+ {19, CAT_Ps_QUOTE},
+ {131, CAT_Pe_END},
+ {15, CAT_Pe_QUOTE},
+ {21, CAT_Pi_QUOTE},
+ {19, CAT_Pf_QUOTE},
+ {13, CAT_Pc_CONNECTOR},
+ {307, CAT_Po_OTHER},
+ {11, CAT_Po_QUOTE},
+ {129, CAT_Po_TERMINAL},
+ {5, CAT_Po_EXTENDER},
+ {5, CAT_Po_HYPHEN},
+ {135, CAT_Sm_MATH},
+ {7, CAT_Sm_MINUS},
+ {35, CAT_Sc_CURRENCY},
+ {59, CAT_Sk_MODIFIER},
+ {349, CAT_So_OTHER},
+ {3, CAT_Ps_SINGLE_QUOTE},
+ {0, nullptr},
+ {5, CAT_Pi_SINGLE_QUOTE},
+ {3, CAT_Pf_SINGLE_QUOTE},
+ {9, CAT_Po_SINGLE_QUOTE},
+};
+
+struct TNamedCategoryRanges: THashMap<TStringBuf, TCategoryRanges> {
+ typedef THashMap<TStringBuf, TCategoryRanges> TBase;
+
+ TNamedCategoryRanges() {
+ Insert(TStringBuf("C"), CAT_C, 1284);
+ Insert(TStringBuf("Cc"), CAT_Cc, 5);
+ Insert(TStringBuf("Cc_ASCII"), CAT_Cc_ASCII, 7);
+ Insert(TStringBuf("Cc_SEPARATOR"), CAT_Cc_SEPARATOR, 3);
+ Insert(TStringBuf("Cc_SPACE"), CAT_Cc_SPACE, 3);
+ Insert(TStringBuf("Cf"), CAT_Cf, 37);
+ Insert(TStringBuf("Cf_BIDI"), CAT_Cf_BIDI, 19);
+ Insert(TStringBuf("Cf_FORMAT"), CAT_Cf_FORMAT, 43);
+ Insert(TStringBuf("Cf_JOIN"), CAT_Cf_JOIN, 5);
+ Insert(TStringBuf("Cf_ZWNBSP"), CAT_Cf_ZWNBSP, 3);
+ Insert(TStringBuf("Cn"), CAT_Cn, 1274);
+ Insert(TStringBuf("Cn_UNASSIGNED"), CAT_Cn_UNASSIGNED, 1274);
+ Insert(TStringBuf("Co"), CAT_Co, 1274);
+ Insert(TStringBuf("Co_PRIVATE"), CAT_Co_PRIVATE, 1274);
+ Insert(TStringBuf("Cs"), CAT_Cs, 3);
+ Insert(TStringBuf("Cs_HIGH"), CAT_Cs_HIGH, 3);
+ Insert(TStringBuf("Cs_LOW"), CAT_Cs_LOW, 3);
+ Insert(TStringBuf("L"), CAT_L, 1143);
+ Insert(TStringBuf("Ll"), CAT_Ll, 1267);
+ Insert(TStringBuf("Ll_LOWER"), CAT_Ll_LOWER, 1267);
+ Insert(TStringBuf("Lm"), CAT_Lm, 115);
+ Insert(TStringBuf("Lm_EXTENDER"), CAT_Lm_EXTENDER, 59);
+ Insert(TStringBuf("Lm_LETTER"), CAT_Lm_LETTER, 63);
+ Insert(TStringBuf("Lo"), CAT_Lo, 891);
+ Insert(TStringBuf("Lo_HIRAGANA"), CAT_Lo_HIRAGANA, 7);
+ Insert(TStringBuf("Lo_IDEOGRAPH"), CAT_Lo_IDEOGRAPH, 23);
+ Insert(TStringBuf("Lo_KATAKANA"), CAT_Lo_KATAKANA, 13);
+ Insert(TStringBuf("Lo_LEADING"), CAT_Lo_LEADING, 5);
+ Insert(TStringBuf("Lo_OTHER"), CAT_Lo_OTHER, 849);
+ Insert(TStringBuf("Lo_TRAILING"), CAT_Lo_TRAILING, 5);
+ Insert(TStringBuf("Lo_VOWEL"), CAT_Lo_VOWEL, 5);
+ Insert(TStringBuf("Lt"), CAT_Lt, 21);
+ Insert(TStringBuf("Lt_TITLE"), CAT_Lt_TITLE, 21);
+ Insert(TStringBuf("Lu"), CAT_Lu, 1255);
+ Insert(TStringBuf("Lu_UPPER"), CAT_Lu_UPPER, 1255);
+ Insert(TStringBuf("M"), CAT_M, 501);
+ Insert(TStringBuf("Mc"), CAT_Mc, 311);
+ Insert(TStringBuf("Mc_SPACING"), CAT_Mc_SPACING, 311);
+ Insert(TStringBuf("Me"), CAT_Me, 11);
+ Insert(TStringBuf("Me_ENCLOSING"), CAT_Me_ENCLOSING, 11);
+ Insert(TStringBuf("Mn"), CAT_Mn, 571);
+ Insert(TStringBuf("Mn_NONSPACING"), CAT_Mn_NONSPACING, 571);
+ Insert(TStringBuf("N"), CAT_N, 231);
+ Insert(TStringBuf("Nd"), CAT_Nd, 109);
+ Insert(TStringBuf("Nd_DIGIT"), CAT_Nd_DIGIT, 109);
+ Insert(TStringBuf("Nl"), CAT_Nl, 25);
+ Insert(TStringBuf("Nl_IDEOGRAPH"), CAT_Nl_IDEOGRAPH, 7);
+ Insert(TStringBuf("Nl_LETTER"), CAT_Nl_LETTER, 19);
+ Insert(TStringBuf("No"), CAT_No, 121);
+ Insert(TStringBuf("No_OTHER"), CAT_No_OTHER, 121);
+ Insert(TStringBuf("P"), CAT_P, 337);
+ Insert(TStringBuf("Pc"), CAT_Pc, 13);
+ Insert(TStringBuf("Pc_CONNECTOR"), CAT_Pc_CONNECTOR, 13);
+ Insert(TStringBuf("Pd"), CAT_Pd, 35);
+ Insert(TStringBuf("Pd_DASH"), CAT_Pd_DASH, 15);
+ Insert(TStringBuf("Pd_HYPHEN"), CAT_Pd_HYPHEN, 23);
+ Insert(TStringBuf("Pe"), CAT_Pe, 145);
+ Insert(TStringBuf("Pe_END"), CAT_Pe_END, 131);
+ Insert(TStringBuf("Pe_QUOTE"), CAT_Pe_QUOTE, 15);
+ Insert(TStringBuf("Pf"), CAT_Pf, 21);
+ Insert(TStringBuf("Pf_QUOTE"), CAT_Pf_QUOTE, 19);
+ Insert(TStringBuf("Pf_SINGLE_QUOTE"), CAT_Pf_SINGLE_QUOTE, 3);
+ Insert(TStringBuf("Pi"), CAT_Pi, 23);
+ Insert(TStringBuf("Pi_QUOTE"), CAT_Pi_QUOTE, 21);
+ Insert(TStringBuf("Pi_SINGLE_QUOTE"), CAT_Pi_SINGLE_QUOTE, 5);
+ Insert(TStringBuf("Po"), CAT_Po, 331);
+ Insert(TStringBuf("Po_EXTENDER"), CAT_Po_EXTENDER, 5);
+ Insert(TStringBuf("Po_HYPHEN"), CAT_Po_HYPHEN, 5);
+ Insert(TStringBuf("Po_OTHER"), CAT_Po_OTHER, 307);
+ Insert(TStringBuf("Po_QUOTE"), CAT_Po_QUOTE, 11);
+ Insert(TStringBuf("Po_SINGLE_QUOTE"), CAT_Po_SINGLE_QUOTE, 9);
+ Insert(TStringBuf("Po_TERMINAL"), CAT_Po_TERMINAL, 129);
+ Insert(TStringBuf("Ps"), CAT_Ps, 151);
+ Insert(TStringBuf("Ps_QUOTE"), CAT_Ps_QUOTE, 19);
+ Insert(TStringBuf("Ps_SINGLE_QUOTE"), CAT_Ps_SINGLE_QUOTE, 3);
+ Insert(TStringBuf("Ps_START"), CAT_Ps_START, 131);
+ Insert(TStringBuf("S"), CAT_S, 437);
+ Insert(TStringBuf("Sc"), CAT_Sc, 35);
+ Insert(TStringBuf("Sc_CURRENCY"), CAT_Sc_CURRENCY, 35);
+ Insert(TStringBuf("Sk"), CAT_Sk, 59);
+ Insert(TStringBuf("Sk_MODIFIER"), CAT_Sk_MODIFIER, 59);
+ Insert(TStringBuf("Sm"), CAT_Sm, 129);
+ Insert(TStringBuf("Sm_MATH"), CAT_Sm_MATH, 135);
+ Insert(TStringBuf("Sm_MINUS"), CAT_Sm_MINUS, 7);
+ Insert(TStringBuf("So"), CAT_So, 349);
+ Insert(TStringBuf("So_OTHER"), CAT_So_OTHER, 349);
+ Insert(TStringBuf("Z"), CAT_Z, 17);
+ Insert(TStringBuf("Zl"), CAT_Zl, 3);
+ Insert(TStringBuf("Zl_LINE"), CAT_Zl_LINE, 3);
+ Insert(TStringBuf("Zp"), CAT_Zp, 3);
+ Insert(TStringBuf("Zp_PARAGRAPH"), CAT_Zp_PARAGRAPH, 3);
+ Insert(TStringBuf("Zs"), CAT_Zs, 15);
+ Insert(TStringBuf("Zs_SPACE"), CAT_Zs_SPACE, 15);
+ Insert(TStringBuf("Zs_ZWSPACE"), CAT_Zs_ZWSPACE, 3);
+ }
+
+ inline void Insert(const TStringBuf& category, const wchar32* data, size_t count) {
+ TCategoryRanges categoryRanges;
+ categoryRanges.Count = count;
+ categoryRanges.Data = data;
+ TBase::insert(::std::make_pair(category, categoryRanges));
+ }
+
+ inline const TCategoryRanges& Get(const TStringBuf& category) const {
+ TBase::const_iterator i = TBase::find(category);
+ if (i == TBase::end())
+ throw yexception() << "Unknown Unicode category name '" << category << "'";
+ return i->second;
+ }
+};
+
+const TCategoryRanges& GetCategoryRanges(WC_TYPE cat) {
+ Y_ASSERT(static_cast<size_t>(cat) < Y_ARRAY_SIZE(CATEGORY_RANGES));
+ return CATEGORY_RANGES[cat];
+
+}
+
+const TCategoryRanges& GetCategoryRanges(const TStringBuf& category) {
+ return Default<TNamedCategoryRanges>().Get(category);
+}
+
+} // NPrivate
+} // NUnicode
+
diff --git a/library/cpp/unicode/set/quoted_pair.cpp b/library/cpp/unicode/set/quoted_pair.cpp
new file mode 100644
index 0000000000..7675dbaa74
--- /dev/null
+++ b/library/cpp/unicode/set/quoted_pair.cpp
@@ -0,0 +1,53 @@
+#include "quoted_pair.h"
+
+#include <util/generic/strbuf.h>
+
+namespace NUnicode {
+ EUnicodeQuotedPairType ResolveUnicodeQuotedPair(wchar32 escapedSymbol, wchar32& symbol, TUnicodeSet& set) {
+ switch (escapedSymbol) {
+ case wchar32('a'): // \a -> U+0007 Bell
+ symbol = wchar32('\a');
+ return UQPT_SYMBOL;
+ case wchar32('b'): // \b -> U+0008 Backspace
+ symbol = wchar32('\b');
+ return UQPT_SYMBOL;
+ case wchar32('t'): // \t -> U+0009 Horizontal Tab
+ symbol = wchar32('\t');
+ return UQPT_SYMBOL;
+ case wchar32('n'): // \n -> U+000A Line Feed
+ symbol = wchar32('\n');
+ return UQPT_SYMBOL;
+ case wchar32('v'): // \v -> U+000B Vertical Tab
+ symbol = wchar32('\v');
+ return UQPT_SYMBOL;
+ case wchar32('f'): // \f -> U+000C Form Feed
+ symbol = wchar32('\f');
+ return UQPT_SYMBOL;
+ case wchar32('r'): // \r -> U+000D Carriage Return
+ symbol = wchar32('\r');
+ return UQPT_SYMBOL;
+ case wchar32('s'):
+ set.AddCategory(TStringBuf("Z"));
+ return UQPT_SET;
+ case wchar32('S'):
+ set.Add(TUnicodeSet().AddCategory(TStringBuf("Z")).Invert());
+ return UQPT_SET;
+ case wchar32('w'):
+ set.AddCategory(TStringBuf("L"));
+ return UQPT_SET;
+ case wchar32('W'):
+ set.Add(TUnicodeSet().AddCategory(TStringBuf("L")).Invert());
+ return UQPT_SET;
+ case wchar32('d'):
+ set.AddCategory(TStringBuf("Nd"));
+ return UQPT_SET;
+ case wchar32('D'):
+ set.Add(TUnicodeSet().AddCategory(TStringBuf("Nd")).Invert());
+ return UQPT_SET;
+ default:
+ symbol = escapedSymbol;
+ return UQPT_SYMBOL;
+ }
+ }
+
+}
diff --git a/library/cpp/unicode/set/quoted_pair.h b/library/cpp/unicode/set/quoted_pair.h
new file mode 100644
index 0000000000..a951071f8c
--- /dev/null
+++ b/library/cpp/unicode/set/quoted_pair.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "unicode_set.h"
+
+#include <util/system/defaults.h>
+
+namespace NUnicode {
+ enum EUnicodeQuotedPairType {
+ UQPT_SYMBOL,
+ UQPT_SET,
+ };
+
+ EUnicodeQuotedPairType ResolveUnicodeQuotedPair(wchar32 escapedSymbol, wchar32& symbol, TUnicodeSet& set);
+
+}
diff --git a/library/cpp/unicode/set/set.cpp b/library/cpp/unicode/set/set.cpp
new file mode 100644
index 0000000000..5f8caa3140
--- /dev/null
+++ b/library/cpp/unicode/set/set.cpp
@@ -0,0 +1,6 @@
+#include "set.h"
+
+#include "category_ranges.h"
+#include "unicode_set_lexer.h"
+#include "unicode_set_parser.h"
+#include "unicode_set_token.h"
diff --git a/library/cpp/unicode/set/set.h b/library/cpp/unicode/set/set.h
new file mode 100644
index 0000000000..cad099694a
--- /dev/null
+++ b/library/cpp/unicode/set/set.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#include "unicode_set.h"
+#include "quoted_pair.h"
diff --git a/library/cpp/unicode/set/unicode_set.cpp b/library/cpp/unicode/set/unicode_set.cpp
new file mode 100644
index 0000000000..855bcdd9a6
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set.cpp
@@ -0,0 +1,480 @@
+#include "unicode_set.h"
+
+#include "category_ranges.h"
+#include "unicode_set_parser.h"
+
+#include <util/ysaveload.h>
+#include <util/charset/wide.h>
+#include <util/digest/numeric.h>
+#include <util/generic/buffer.h>
+#include <util/generic/yexception.h>
+#include <util/stream/format.h>
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+#include <util/string/cast.h>
+
+// The original idea of unicode set implementation was taken from the icu::UnicodeSet.
+// UnicodeSet has a set of ranges [from, to), where upper boundary is exclusive.
+// The list of ranges always has a terminal value CODEPOINT_HIGH at the end.
+
+namespace NUnicode {
+ namespace NPrivate {
+ inline wchar32 Bound(wchar32 c) {
+ return c < TUnicodeSet::CODEPOINT_HIGH ? c : TUnicodeSet::CODEPOINT_HIGH - 1;
+ }
+
+ inline void CheckWcType(WC_TYPE c) {
+ if (static_cast<size_t>(c) >= CCL_NUM) {
+ throw yexception() << "Category ID must be less than CCL_NUM (" << static_cast<size_t>(CCL_NUM) << "), specified: " << static_cast<size_t>(c);
+ }
+ }
+
+ }
+
+ // Returns the smallest value i >= from such that 'c' < Ranges[i].
+ // Some examples:
+ // GetRangeItem(c, 0)
+ // set Ranges[] c=0 1 3 4 7 8
+ // === ============== ===========
+ // [] [0x110000] 0 0 0 0 0 0
+ // [:Any:] [0, 0x110000] 1 1 1 1 1 1
+ // [\u0000-\u0003] [0, 4, 0x110000] 1 1 1 2 2 2
+ // [\u0004-\u0007] [4, 8, 0x110000] 0 0 0 1 1 2
+ //
+ // So, if method returns an odd value then 'c' falls to the {Range[i-1],Range[i]} range.
+ size_t TUnicodeSet::GetRangeItem(wchar32 c, size_t from) const {
+ Y_ASSERT(Valid());
+ Y_ASSERT(from < Length);
+ if (c < Ranges[from])
+ return from;
+ size_t lo = from;
+ size_t hi = Length - 1;
+ if (lo >= hi || c >= Ranges[hi - 1]) {
+ return hi;
+ }
+ for (;;) {
+ size_t i = (lo + hi) >> 1;
+ if (i == lo) {
+ break;
+ } else if (c < Ranges[i]) {
+ hi = i;
+ } else {
+ lo = i;
+ }
+ }
+ return hi;
+ }
+
+ wchar32* TUnicodeSet::EnsureCapacity(size_t capacity) {
+ if (capacity <= Capacity) {
+ return const_cast<wchar32*>(Ranges);
+ }
+
+ TDynamicBuffer buf = new wchar32[capacity];
+ Copy<const wchar32*, wchar32*>(Ranges, Ranges + Length, buf.Get());
+ DoSwap(buf, DynBuffer);
+ Ranges = DynBuffer.Get();
+ Capacity = capacity;
+ return DynBuffer.Get();
+ }
+
+ wchar32* TUnicodeSet::InsertRangeSlots(const size_t pos, const size_t count) {
+ Y_ASSERT(pos < Length);
+ wchar32* src = EnsureCapacity(Length + count) + Length - 1;
+ wchar32* dst = src + count;
+ for (size_t i = 0; i < Length - pos; ++i) {
+ *dst-- = *src--;
+ }
+ Length += count;
+ return src + 1;
+ }
+
+ void TUnicodeSet::EraseRangeSlots(const size_t pos, const size_t count) {
+ Y_ASSERT(pos < Length);
+ Y_ASSERT(pos + count <= Length);
+ wchar32* dst = EnsureWritable() + pos;
+ wchar32* src = dst + count;
+ for (size_t i = 0; i < Length - pos - count; ++i) {
+ *dst++ = *src++;
+ }
+ Length -= count;
+ }
+
+ TUnicodeSet::TUnicodeSet()
+ : Ranges(ShortBuffer)
+ , Length(0)
+ , Capacity(Y_ARRAY_SIZE(ShortBuffer))
+ {
+ Clear();
+ }
+
+ TUnicodeSet::TUnicodeSet(const TUnicodeSet& s)
+ : Ranges(ShortBuffer)
+ , Length(0)
+ , Capacity(Y_ARRAY_SIZE(ShortBuffer))
+ {
+ Set(s);
+ }
+
+ // from, to - inclusive
+ TUnicodeSet::TUnicodeSet(wchar32 from, wchar32 to)
+ : Ranges(ShortBuffer)
+ , Length(0)
+ , Capacity(Y_ARRAY_SIZE(ShortBuffer))
+ {
+ Set(from, to);
+ }
+
+ TUnicodeSet::TUnicodeSet(const TWtringBuf& s)
+ : Ranges(ShortBuffer)
+ , Length(0)
+ , Capacity(Y_ARRAY_SIZE(ShortBuffer))
+ {
+ Set(s);
+ }
+
+ TUnicodeSet::TUnicodeSet(WC_TYPE c)
+ : Ranges(ShortBuffer)
+ , Length(0)
+ , Capacity(Y_ARRAY_SIZE(ShortBuffer))
+ {
+ Set(c);
+ }
+
+ void TUnicodeSet::AddPredefRanges(const NPrivate::TCategoryRanges& ranges) {
+ if (ranges.Count > 0) {
+ for (size_t i = 0; i + 1 < ranges.Count; i += 2) {
+ Add(ranges.Data[i], ranges.Data[i + 1] - 1);
+ }
+ }
+ }
+
+ TUnicodeSet& TUnicodeSet::Add(const TUnicodeSet& s) {
+ if (Empty()) {
+ TUnicodeSet::operator=(s);
+ return *this;
+ }
+ for (size_t i = 0; i + 1 < s.Length; i += 2) {
+ Add(s.Ranges[i], s.Ranges[i + 1] - 1);
+ }
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Add(const TWtringBuf& s) {
+ const wchar16* begin = s.data();
+ const wchar16* end = s.data() + s.size();
+ while (begin < end) {
+ Add(ReadSymbolAndAdvance(begin, end));
+ }
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Add(wchar32 c) {
+ c = NPrivate::Bound(c);
+ const size_t i = GetRangeItem(c);
+ if (i & 1) {
+ return *this;
+ }
+ if (c == Ranges[i] - 1) { // The char adjoins with the next range
+ if (i > 0 && Ranges[i - 1] == c) { // The char adjoins with the previous range too
+ if (i + 1 == Length) { // Don't delete the last TERMINAL
+ EraseRangeSlots(i - 1, 1);
+ } else {
+ EraseRangeSlots(i - 1, 2); // Collapse ranges
+ }
+ } else {
+ EnsureWritable()[i] = c;
+ }
+ } else if (i > 0 && Ranges[i - 1] == c) {
+ ++(EnsureWritable()[i - 1]);
+ } else {
+ wchar32* target = InsertRangeSlots(i, 2);
+ *target++ = c;
+ *target = c + 1;
+ }
+ Y_ASSERT(Valid());
+
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Add(wchar32 from, wchar32 to) {
+ from = NPrivate::Bound(from);
+ to = NPrivate::Bound(to);
+ Y_ASSERT(from <= to);
+ if (to == from) {
+ return Add(to);
+ } else if (from > to) {
+ return *this;
+ }
+
+ size_t i = GetRangeItem(from);
+
+ if (to < Ranges[i]) {
+ if (i & 1) {
+ return *this;
+ }
+ if (i > 0 && Ranges[i - 1] == from) {
+ if (Ranges[i] == to + 1) {
+ if (i + 1 == Length) {
+ EraseRangeSlots(i - 1, 1);
+ } else {
+ EraseRangeSlots(i - 1, 2);
+ }
+ } else {
+ EnsureWritable()[i - 1] = to + 1;
+ }
+ } else if (Ranges[i] == to + 1) {
+ if (i + 1 == Length) {
+ *InsertRangeSlots(i, 1) = from;
+ } else {
+ EnsureWritable()[i] = from;
+ }
+ } else {
+ wchar32* target = InsertRangeSlots(i, 2);
+ *target++ = from;
+ *target = to + 1;
+ }
+ Y_ASSERT(Valid());
+ return *this;
+ }
+
+ size_t j = GetRangeItem(to, i);
+ Y_ASSERT(i < j);
+
+ if (0 == (j & 1)) { // 'to' falls between ranges
+ if (Ranges[j] > to + 1) {
+ *InsertRangeSlots(j, 1) = to + 1;
+ } else if (j + 1 < Length) { // Exclude last TERMINAL element
+ Y_ASSERT(Ranges[j] == to + 1);
+ // The next range adjoins with the current one. Join them
+ ++j;
+ }
+ }
+
+ if (0 == (i & 1)) { // 'from' falls between ranges
+ if (i > 0 && Ranges[i - 1] == from) {
+ --i;
+ } else {
+ *InsertRangeSlots(i, 1) = from;
+ ++i;
+ ++j;
+ }
+ }
+
+ // Erase ranges, which are covered by the new one
+ Y_ASSERT(i <= j);
+ Y_ASSERT(i <= Length);
+ Y_ASSERT(j <= Length);
+ EraseRangeSlots(i, j - i);
+
+ Y_ASSERT(Valid());
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Add(WC_TYPE c) {
+ NPrivate::CheckWcType(c);
+ if (Empty()) {
+ return Set(c);
+ }
+ AddPredefRanges(NPrivate::GetCategoryRanges(c));
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::AddCategory(const TStringBuf& catName) {
+ if (Empty()) {
+ return SetCategory(catName);
+ }
+ AddPredefRanges(NPrivate::GetCategoryRanges(catName));
+ return *this;
+ }
+
+ void TUnicodeSet::SetPredefRanges(const NPrivate::TCategoryRanges& ranges) {
+ Clear();
+ if (ranges.Count > 0) {
+ DynBuffer.Drop();
+ Ranges = ranges.Data;
+ Length = ranges.Count;
+ Capacity = 0;
+ }
+ }
+
+ TUnicodeSet& TUnicodeSet::Set(const TUnicodeSet& s) {
+ if (0 == s.Capacity) {
+ DynBuffer.Drop();
+ Ranges = s.Ranges;
+ Length = s.Length;
+ Capacity = 0;
+ } else if (s.Ranges == s.DynBuffer.Get()) {
+ DynBuffer = s.DynBuffer;
+ Ranges = DynBuffer.Get();
+ Length = s.Length;
+ Capacity = s.Capacity;
+ } else {
+ ::Copy(s.Ranges, s.Ranges + s.Length, EnsureCapacity(s.Length));
+ Length = s.Length;
+ }
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Set(wchar32 from, wchar32 to) {
+ from = NPrivate::Bound(from);
+ to = NPrivate::Bound(to);
+ Y_ASSERT(from <= to);
+
+ Clear();
+
+ if (to == from) {
+ return Add(to);
+ } else if (from > to) {
+ return *this;
+ }
+
+ if (to + 1 != CODEPOINT_HIGH) {
+ wchar32* target = InsertRangeSlots(0, 2);
+ *target++ = from;
+ *target = to + 1;
+ } else {
+ *InsertRangeSlots(0, 1) = from;
+ }
+ Y_ASSERT(Valid());
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Set(const TWtringBuf& s) {
+ Clear();
+ return Add(s);
+ }
+
+ TUnicodeSet& TUnicodeSet::Set(WC_TYPE c) {
+ NPrivate::CheckWcType(c);
+ SetPredefRanges(NPrivate::GetCategoryRanges(c));
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::SetCategory(const TStringBuf& catName) {
+ SetPredefRanges(NPrivate::GetCategoryRanges(catName));
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Invert() {
+ Y_ASSERT(Valid());
+ if (0 == Ranges[0]) {
+ EraseRangeSlots(0, 1);
+ } else {
+ *InsertRangeSlots(0, 1) = 0;
+ }
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::MakeCaseInsensitive() {
+ TVector<wchar32> oldRanges(Ranges, Ranges + Length);
+ for (size_t i = 0; i + 1 < oldRanges.size(); i += 2) {
+ for (wchar32 c = oldRanges[i]; c < oldRanges[i + 1]; ++c) {
+ const ::NUnicode::NPrivate::TProperty& p = ::NUnicode::NPrivate::CharProperty(c);
+ if (p.Lower) {
+ Add(static_cast<wchar32>(c + p.Lower));
+ }
+ if (p.Upper) {
+ Add(static_cast<wchar32>(c + p.Upper));
+ }
+ if (p.Title) {
+ Add(static_cast<wchar32>(c + p.Title));
+ }
+ }
+ }
+ return *this;
+ }
+
+ TUnicodeSet& TUnicodeSet::Clear() {
+ if (IsStatic() || IsShared()) {
+ DynBuffer.Drop();
+ ShortBuffer[0] = CODEPOINT_HIGH;
+ Capacity = Y_ARRAY_SIZE(ShortBuffer);
+ Ranges = ShortBuffer;
+ } else {
+ const_cast<wchar32*>(Ranges)[0] = CODEPOINT_HIGH;
+ }
+ Length = 1;
+ return *this;
+ }
+
+ size_t TUnicodeSet::Hash() const {
+ size_t res = 0;
+ for (size_t i = 0; i < Length; ++i) {
+ res = ::CombineHashes(size_t(Ranges[i]), res);
+ }
+ return res;
+ }
+
+ inline void WriteUnicodeChar(IOutputStream& out, wchar32 c, bool needEscape = false) {
+ switch (c) {
+ case wchar32('-'):
+ case wchar32('\\'):
+ case wchar32('^'):
+ needEscape = true;
+ break;
+ default:
+ break;
+ }
+ if (::IsGraph(c) && !needEscape) {
+ char buf[4]; // Max utf8 char length is 4
+ size_t wr = 0;
+ WideToUTF8(&c, 1, buf, wr);
+ Y_ASSERT(wr <= Y_ARRAY_SIZE(buf));
+ out.Write(buf, wr);
+ } else {
+ TString hexRepr = IntToString<16>(c);
+ if (c >> 8 == 0) {
+ out << "\\x" << LeftPad(hexRepr, 2, '0');
+ } else if (c >> 16 == 0) {
+ out << "\\u" << LeftPad(hexRepr, 4, '0');
+ } else {
+ out << "\\U" << LeftPad(hexRepr, 8, '0');
+ }
+ }
+ }
+
+ TString TUnicodeSet::ToString(bool escapeAllChars /* = false*/) const {
+ Y_ASSERT(Valid());
+ TStringStream str;
+ str.Reserve(Length * 4 + Length / 2 + 2);
+
+ str.Write('[');
+ for (size_t i = 0; i + 1 < Length; i += 2) {
+ WriteUnicodeChar(str, Ranges[i], escapeAllChars);
+ if (Ranges[i] + 1 < Ranges[i + 1]) {
+ // Don't write dash for two-symbol ranges
+ if (Ranges[i] + 2 < Ranges[i + 1]) {
+ str.Write('-');
+ }
+ WriteUnicodeChar(str, Ranges[i + 1] - 1, escapeAllChars);
+ }
+ }
+ str.Write(']');
+
+ return str.Str();
+ }
+
+ void TUnicodeSet::Save(IOutputStream* out) const {
+ ::SaveSize(out, Length);
+ ::SaveArray(out, Ranges, Length);
+ }
+
+ void TUnicodeSet::Load(IInputStream* in) {
+ const size_t length = ::LoadSize(in);
+ if (length > 0) {
+ ::LoadArray(in, EnsureCapacity(length), length);
+ }
+ Length = length;
+ if (!Valid()) {
+ ythrow TSerializeException() << "Loaded broken unicode set";
+ }
+ }
+
+ TUnicodeSet& TUnicodeSet::Parse(const TWtringBuf& data) {
+ Clear();
+ NPrivate::ParseUnicodeSet(*this, data);
+ return *this;
+ }
+
+}
diff --git a/library/cpp/unicode/set/unicode_set.h b/library/cpp/unicode/set/unicode_set.h
new file mode 100644
index 0000000000..e573e05143
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set.h
@@ -0,0 +1,154 @@
+#pragma once
+
+#include <util/str_stl.h>
+#include <util/charset/unidata.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/utility.h>
+#include <util/generic/vector.h>
+
+class IInputStream;
+class IOutputStream;
+
+namespace NUnicode {
+ namespace NPrivate {
+ struct TCategoryRanges;
+ }
+
+ class TUnicodeSet {
+ private:
+ typedef TSimpleSharedPtr<wchar32, TDeleteArray> TDynamicBuffer;
+
+ // Ranges can point to:
+ // 1) ShortBuffer for short sets (not more than 2 ranges)
+ // 2) static data (for predefined unicode categories)
+ // 3) or DynBuffer for big sets
+ const wchar32* Ranges;
+ wchar32 ShortBuffer[5];
+ TDynamicBuffer DynBuffer; // Can be shared between multiple sets
+ size_t Length; // Number of slots in Ranges
+ size_t Capacity; // Capacity of currently used buffer. Zero value means reference to static data
+
+ private:
+ Y_FORCE_INLINE bool IsShared() const {
+ return Ranges == DynBuffer.Get() && DynBuffer.RefCount() > 1;
+ }
+
+ Y_FORCE_INLINE bool IsStatic() const {
+ return 0 == Capacity;
+ }
+
+ size_t GetRangeItem(wchar32 c, size_t from = 0) const;
+
+ // Extends buffer capacity if required and returns pointer to the writable buffer of slots
+ wchar32* EnsureCapacity(size_t capacity);
+
+ // Makes the copy of buffer if the unicode set points to the static or shared data, and returns pointer to the writable buffer of slots
+ wchar32* EnsureWritable() {
+ if (IsShared()) {
+ // If multiple UnicodeSets refer to the same buffer then make the copy
+ Capacity = 0;
+ }
+ if (IsStatic()) {
+ // Copy static or shared data to own buffer before modifying
+ return EnsureCapacity(Length);
+ }
+ return const_cast<wchar32*>(Ranges);
+ }
+
+ // Returns pointer to the first inserted slot
+ wchar32* InsertRangeSlots(const size_t pos, const size_t count);
+ void EraseRangeSlots(const size_t pos, const size_t count);
+
+ void AddPredefRanges(const NPrivate::TCategoryRanges& ranges);
+ void SetPredefRanges(const NPrivate::TCategoryRanges& ranges);
+
+ public:
+ enum {
+ CODEPOINT_HIGH = 0x110000 // Next value after maximum valid code point
+ };
+
+ TUnicodeSet();
+ TUnicodeSet(const TUnicodeSet& s);
+ // Unicode set for specific character range. "from", "to" are inclusive
+ TUnicodeSet(wchar32 from, wchar32 to);
+ // Unicode set consists of all characters from the specified string
+ TUnicodeSet(const TWtringBuf& s);
+ // Unicode set for predefined category
+ TUnicodeSet(WC_TYPE c);
+
+ TUnicodeSet& operator=(const TUnicodeSet& s) {
+ return Set(s);
+ }
+
+ inline bool operator==(const TUnicodeSet& s) const {
+ return Length == s.Length && (Ranges == s.Ranges || ::Equal(Ranges, Ranges + Length, s.Ranges));
+ }
+
+ friend inline TUnicodeSet operator~(TUnicodeSet s) {
+ return s.Invert();
+ }
+
+ friend inline TUnicodeSet operator+(const TUnicodeSet& s1, const TUnicodeSet& s2) {
+ return TUnicodeSet(s1).Add(s2);
+ }
+
+ TUnicodeSet& Add(const TUnicodeSet& s);
+ TUnicodeSet& Add(const TWtringBuf& s);
+ TUnicodeSet& Add(wchar32 c);
+ // from, to - inclusive
+ TUnicodeSet& Add(wchar32 from, wchar32 to);
+ TUnicodeSet& Add(WC_TYPE c);
+ // Add unicode category by name (one- or two-letter)
+ TUnicodeSet& AddCategory(const TStringBuf& catName);
+
+ TUnicodeSet& Set(const TUnicodeSet& s);
+ // from, to - inclusive
+ TUnicodeSet& Set(wchar32 from, wchar32 to);
+ TUnicodeSet& Set(const TWtringBuf& s);
+ TUnicodeSet& Set(WC_TYPE c);
+ TUnicodeSet& SetCategory(const TStringBuf& catName);
+
+ TUnicodeSet& Invert();
+ // Converts existing unicode set to the case-insensitive set
+ TUnicodeSet& MakeCaseInsensitive();
+ TUnicodeSet& Clear();
+
+ size_t Hash() const;
+ TString ToString(bool escapeAllChars = false) const;
+
+ inline bool Valid() const {
+ return Length > 0 && Ranges[Length - 1] == CODEPOINT_HIGH;
+ }
+
+ inline bool Has(wchar32 c) const {
+ if (Y_UNLIKELY(c >= CODEPOINT_HIGH)) {
+ return false;
+ }
+ const size_t item = GetRangeItem(c);
+ return (item & 1);
+ }
+
+ inline bool Empty() const {
+ Y_ASSERT(Valid());
+ return Length < 2;
+ }
+
+ void Save(IOutputStream* out) const;
+ void Load(IInputStream* in);
+
+ TUnicodeSet& Parse(const TWtringBuf& data);
+ };
+
+ using TUnicodeSetPtr = TSimpleSharedPtr<TUnicodeSet>;
+
+}
+
+template <>
+struct THash<NUnicode::TUnicodeSet> {
+ size_t operator()(const NUnicode::TUnicodeSet& s) const {
+ return s.Hash();
+ }
+};
diff --git a/library/cpp/unicode/set/unicode_set_lexer.h b/library/cpp/unicode/set/unicode_set_lexer.h
new file mode 100644
index 0000000000..c584f317fc
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set_lexer.h
@@ -0,0 +1,49 @@
+#pragma once
+
+#include "unicode_set_token.h"
+
+#include <util/generic/strbuf.h>
+#include <util/system/yassert.h>
+
+namespace NUnicode {
+ namespace NPrivate {
+ class TUnicodeSetLexer {
+ private:
+ const TWtringBuf& Data;
+
+ int cs;
+ //int* stack;
+ //int top;
+ int act;
+ const wchar16* ts;
+ const wchar16* te;
+ const wchar16* p;
+ const wchar16* pe;
+ const wchar16* eof;
+
+ TUnicodeSetToken LastToken;
+ bool UseLast;
+
+ private:
+ EUnicodeSetTokenType YieldToken(EUnicodeSetTokenType type);
+ EUnicodeSetTokenType YieldToken(EUnicodeSetTokenType type, wchar16 symbol);
+ EUnicodeSetTokenType YieldToken(EUnicodeSetTokenType type, const wchar16* dataBegin, size_t dataSize);
+ void Reset();
+
+ public:
+ explicit TUnicodeSetLexer(const TWtringBuf& data);
+
+ EUnicodeSetTokenType GetToken();
+
+ const TUnicodeSetToken& GetLastToken() {
+ return LastToken;
+ }
+
+ inline void PushBack() {
+ Y_VERIFY(!UseLast, "Double TUnicodeSetLexer::PushBack()");
+ UseLast = true;
+ }
+ };
+
+ }
+}
diff --git a/library/cpp/unicode/set/unicode_set_lexer.rl6 b/library/cpp/unicode/set/unicode_set_lexer.rl6
new file mode 100644
index 0000000000..ebbc131556
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set_lexer.rl6
@@ -0,0 +1,125 @@
+#include <library/cpp/unicode/set/unicode_set_lexer.h>
+
+#include <util/generic/yexception.h>
+
+namespace NUnicode {
+namespace NPrivate {
+
+%%{
+ machine unicode_set_lexer;
+
+ alphtype unsigned short;
+
+ action IncorrectCategoryError {
+ throw yexception() << "incorrect category";
+ }
+
+ action IncorrectEscapedCodepointError {
+ throw yexception() << "incorrect escaped codepoint";
+ }
+
+ action IncorrectQuotedPairError {
+ throw yexception() << "incorrect quoted pair";
+ }
+
+ id = alpha (alnum | '_')*;
+ escape = [%\\];
+
+ category = (':' id ':') <>^IncorrectCategoryError;
+ xdigit8 = xdigit{8} @^IncorrectEscapedCodepointError;
+ xdigit4 = xdigit{4} @^IncorrectEscapedCodepointError;
+ xdigit2 = xdigit{2} @^IncorrectEscapedCodepointError;
+ symbol = any @^IncorrectQuotedPairError;
+
+ main := |*
+ '^' => {
+ return YieldToken(USTT_NEGATION);
+ };
+ '-' => {
+ return YieldToken(USTT_RANGE);
+ };
+ '[' => {
+ return YieldToken(USTT_LBRACKET);
+ };
+ ']' => {
+ return YieldToken(USTT_RBRACKET);
+ };
+ category => {
+ return YieldToken(USTT_CATEGORY, ts + 1, te - ts -2);
+ };
+ escape 'U' xdigit8 => {
+ return YieldToken(USTT_CODEPOINT32, ts + 2, 8);
+ };
+ escape 'u' xdigit4 => {
+ return YieldToken(USTT_CODEPOINT16, ts + 2, 4);
+ };
+ escape 'x' xdigit2 => {
+ return YieldToken(USTT_CODEPOINT8, ts + 2, 2);
+ };
+ escape symbol => {
+ return YieldToken(USTT_QUOTED_PAIR, *(ts + 1));
+ };
+ any => {
+ return YieldToken(USTT_SYMBOL, *ts);
+ };
+ *|;
+
+}%%
+
+namespace {
+
+%% write data;
+
+}
+
+TUnicodeSetLexer::TUnicodeSetLexer(const TWtringBuf& data)
+ : Data(data)
+ , cs(0)
+ , act(0)
+ , ts(NULL)
+ , te(NULL)
+ , p(Data.data())
+ , pe(Data.data() + Data.size())
+ , eof(pe)
+ , UseLast(false)
+{
+ %% write init;
+}
+
+EUnicodeSetTokenType TUnicodeSetLexer::GetToken() {
+ if (UseLast) {
+ UseLast = false;
+ return LastToken.Type;
+ }
+
+ %% write exec;
+
+ return YieldToken(USTT_EOS);
+}
+
+EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type) {
+ Reset();
+ LastToken = TUnicodeSetToken(type);
+ return type;
+}
+
+EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, wchar16 symbol) {
+ Reset();
+ LastToken = TUnicodeSetToken(type, symbol);
+ return type;
+}
+
+EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, const wchar16* dataBegin, size_t dataSize) {
+ Reset();
+ LastToken = TUnicodeSetToken(type, dataBegin, dataSize);
+ return type;
+}
+
+void TUnicodeSetLexer::Reset() {
+ p = te;
+ ts = NULL;
+ te = NULL;
+}
+
+} // NPrivate
+} // NUnicode
diff --git a/library/cpp/unicode/set/unicode_set_parser.cpp b/library/cpp/unicode/set/unicode_set_parser.cpp
new file mode 100644
index 0000000000..754c7add41
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set_parser.cpp
@@ -0,0 +1,109 @@
+#include "quoted_pair.h"
+#include "unicode_set_lexer.h"
+
+#include <util/string/cast.h>
+#include <util/charset/wide.h>
+
+namespace NUnicode {
+ namespace NPrivate {
+#define UNEXPECTED_TOKEN throw yexception() << "Unexpected token: " << lexer.GetLastToken()
+
+#define EXPECT_TOKEN(type) \
+ if (lexer.GetToken() != type) { \
+ UNEXPECTED_TOKEN; \
+ }
+
+ void ParseUnicodeSet(TUnicodeSet& set, TUnicodeSetLexer& lexer);
+
+ void ParseCharSequence(TUnicodeSet& set, TUnicodeSetLexer& lexer) {
+ wchar32 prevChar = 0;
+ bool range = false;
+ for (EUnicodeSetTokenType type = lexer.GetToken(); type != USTT_RBRACKET; type = lexer.GetToken()) {
+ wchar32 curChar = 0;
+ switch (type) {
+ case USTT_SYMBOL:
+ curChar = lexer.GetLastToken().Symbol;
+ break;
+ case USTT_NEGATION:
+ curChar = '^';
+ break;
+ case USTT_QUOTED_PAIR:
+ ResolveUnicodeQuotedPair(lexer.GetLastToken().Symbol, curChar, set);
+ break;
+ case USTT_CODEPOINT8:
+ case USTT_CODEPOINT16:
+ case USTT_CODEPOINT32:
+ curChar = IntFromString<ui32, 16>(lexer.GetLastToken().Data);
+ if (curChar >= TUnicodeSet::CODEPOINT_HIGH) {
+ throw yexception() << "Invalid unicode codepoint: " << lexer.GetLastToken();
+ }
+ break;
+ case USTT_RANGE:
+ if (0 == prevChar) {
+ UNEXPECTED_TOKEN;
+ }
+ range = true;
+ continue;
+ case USTT_LBRACKET: {
+ lexer.PushBack();
+ TUnicodeSet inner;
+ ParseUnicodeSet(inner, lexer);
+ set.Add(inner);
+ break;
+ }
+ default:
+ UNEXPECTED_TOKEN;
+ }
+ if (curChar) {
+ if (range) {
+ if (prevChar >= curChar) {
+ throw yexception() << "Invalid character range";
+ }
+ set.Add(prevChar, curChar);
+ curChar = 0;
+ } else {
+ set.Add(curChar);
+ }
+ } else if (range) {
+ UNEXPECTED_TOKEN;
+ }
+ range = false;
+ prevChar = curChar;
+ }
+ if (range) {
+ UNEXPECTED_TOKEN;
+ }
+ lexer.PushBack();
+ }
+
+ void ParseUnicodeSet(TUnicodeSet& set, TUnicodeSetLexer& lexer) {
+ EXPECT_TOKEN(USTT_LBRACKET);
+ bool invert = false;
+ if (USTT_NEGATION == lexer.GetToken()) {
+ invert = true;
+ } else {
+ lexer.PushBack();
+ }
+
+ if (USTT_CATEGORY == lexer.GetToken()) {
+ set.AddCategory(WideToUTF8(lexer.GetLastToken().Data));
+ } else {
+ lexer.PushBack();
+ ParseCharSequence(set, lexer);
+ }
+
+ EXPECT_TOKEN(USTT_RBRACKET);
+
+ if (invert) {
+ set.Invert();
+ }
+ }
+
+ void ParseUnicodeSet(TUnicodeSet& set, const TWtringBuf& data) {
+ TUnicodeSetLexer lexer(data);
+ ParseUnicodeSet(set, lexer);
+ EXPECT_TOKEN(USTT_EOS);
+ }
+
+ } // NPrivate
+}
diff --git a/library/cpp/unicode/set/unicode_set_parser.h b/library/cpp/unicode/set/unicode_set_parser.h
new file mode 100644
index 0000000000..64516ae23a
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set_parser.h
@@ -0,0 +1,11 @@
+#pragma once
+
+#include "unicode_set.h"
+
+#include <util/generic/strbuf.h>
+
+namespace NUnicode {
+ namespace NPrivate {
+ void ParseUnicodeSet(TUnicodeSet& set, const TWtringBuf& data);
+ }
+}
diff --git a/library/cpp/unicode/set/unicode_set_token.cpp b/library/cpp/unicode/set/unicode_set_token.cpp
new file mode 100644
index 0000000000..525e3dea29
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set_token.cpp
@@ -0,0 +1 @@
+#include "unicode_set_token.h"
diff --git a/library/cpp/unicode/set/unicode_set_token.h b/library/cpp/unicode/set/unicode_set_token.h
new file mode 100644
index 0000000000..6fa619f757
--- /dev/null
+++ b/library/cpp/unicode/set/unicode_set_token.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/stream/output.h>
+
+namespace NUnicode {
+ namespace NPrivate {
+ enum EUnicodeSetTokenType {
+ USTT_EOS /* "eos" */,
+ USTT_SYMBOL /* "symbol" */,
+ USTT_QUOTED_PAIR /* "quoted-pair" */,
+ USTT_CODEPOINT8 /* "codepoint8" */,
+ USTT_CODEPOINT16 /* "codepoint16" */,
+ USTT_CODEPOINT32 /* "codepoint32" */,
+ USTT_CATEGORY /* "category" */,
+ USTT_NEGATION /* "negation" */,
+ USTT_RANGE /* "range" */,
+ USTT_LBRACKET /* "lbracket" */,
+ USTT_RBRACKET /* "rbracket" */,
+ };
+
+ struct TUnicodeSetToken {
+ EUnicodeSetTokenType Type;
+ wchar16 Symbol;
+ TWtringBuf Data;
+
+ explicit TUnicodeSetToken()
+ : Type(USTT_EOS)
+ , Symbol(0)
+ , Data()
+ {
+ }
+
+ explicit TUnicodeSetToken(EUnicodeSetTokenType tokenType)
+ : Type(tokenType)
+ , Symbol(0)
+ , Data()
+ {
+ }
+
+ explicit TUnicodeSetToken(EUnicodeSetTokenType tokenType, wchar16 symbol)
+ : Type(tokenType)
+ , Symbol(symbol)
+ , Data()
+ {
+ }
+
+ explicit TUnicodeSetToken(EUnicodeSetTokenType tokenType, const wchar16* dataBegin, size_t dataSize)
+ : Type(tokenType)
+ , Symbol(0)
+ , Data(dataBegin, dataSize)
+ {
+ }
+ };
+
+ }
+}
+
+Y_DECLARE_OUT_SPEC(inline, NUnicode::NPrivate::TUnicodeSetToken, output, token) {
+ output << token.Type;
+ if (token.Symbol) {
+ output << ":" << TUtf16String(1, token.Symbol).Quote();
+ }
+ if (!token.Data.empty()) {
+ output << ":" << TUtf16String(token.Data).Quote();
+ }
+}
diff --git a/ydb/library/yql/udfs/common/unicode_base/lib/CMakeLists.txt b/ydb/library/yql/udfs/common/unicode_base/lib/CMakeLists.txt
index c896e9ce1a..01b8adc070 100644
--- a/ydb/library/yql/udfs/common/unicode_base/lib/CMakeLists.txt
+++ b/ydb/library/yql/udfs/common/unicode_base/lib/CMakeLists.txt
@@ -19,6 +19,7 @@ target_link_libraries(common-unicode_base-lib PUBLIC
cpp-deprecated-split
cpp-string_utils-levenshtein_diff
cpp-unicode-normalization
+ cpp-unicode-set
yql-public-udf
library-yql-utils
)
diff --git a/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h b/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h
index 947748dbf5..fc7519db1c 100644
--- a/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h
+++ b/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h
@@ -6,6 +6,7 @@
#include <library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h>
#include <library/cpp/unicode/normalization/normalization.h>
+#include <library/cpp/unicode/set/unicode_set.h>
#include <library/cpp/deprecated/split/split_iterator.h>
#include <util/string/join.h>
@@ -13,7 +14,9 @@
#include <util/string/split.h>
#include <util/string/subst.h>
#include <util/charset/wide.h>
+#include <util/charset/utf8.h>
#include <util/string/strip.h>
+#include <util/string/ascii.h>
#include <util/charset/unidata.h>
using namespace NYql;
@@ -41,6 +44,16 @@ namespace {
XX(NormalizeNFKD, NFKD) \
XX(NormalizeNFKC, NFKC)
+#define IS_CATEGORY_UDF_MAP(XX) \
+ XX(IsAscii, IsAscii) \
+ XX(IsSpace, IsSpace) \
+ XX(IsUpper, IsUpper) \
+ XX(IsLower, IsLower) \
+ XX(IsDigit, IsDigit) \
+ XX(IsAlpha, IsAlpha) \
+ XX(IsAlnum, IsAlnum) \
+ XX(IsHex, IsHexdigit)
+
#define NORMALIZE_UDF(name, mode) \
SIMPLE_UDF(T##name, TUtf8(TAutoMap<TUtf8>)) { \
const auto& inputRef = args[0].AsStringRef(); \
@@ -49,7 +62,26 @@ namespace {
return valueBuilder->NewString(output); \
}
+#define IS_CATEGORY_UDF(udfName, function) \
+ SIMPLE_UDF(T##udfName, bool(TAutoMap<TUtf8>)) { \
+ Y_UNUSED(valueBuilder); \
+ const TStringBuf input(args[0].AsStringRef()); \
+ bool result = true; \
+ wchar32 rune; \
+ const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); \
+ const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); \
+ while (cur != last) { \
+ ReadUTF8CharAndAdvance(rune, cur, last); \
+ if (!function(rune)) { \
+ result = false; \
+ break; \
+ } \
+ } \
+ return TUnboxedValuePod(result); \
+ }
+
NORMALIZE_UDF_MAP(NORMALIZE_UDF)
+ IS_CATEGORY_UDF_MAP(IS_CATEGORY_UDF)
SIMPLE_UDF(TIsUtf, bool(TOptional<char*>)) {
Y_UNUSED(valueBuilder);
@@ -449,9 +481,35 @@ namespace {
return valueBuilder->NewString(WideToUTF8(result));
}
+ SIMPLE_UDF(TIsUnicodeSet, bool(TAutoMap<TUtf8>, TUtf8)) {
+ Y_UNUSED(valueBuilder);
+ const TStringBuf input(args[0].AsStringRef());
+ const TUtf16String& customCategory = UTF8ToWide(args[1].AsStringRef());
+ TUnicodeSet unicodeSet;
+ try {
+ unicodeSet.Parse(customCategory);
+ } catch (...) {
+ UdfTerminate((TStringBuilder() << "Failed to parse unicode set: " << CurrentExceptionMessage()).c_str());
+ }
+ bool result = true;
+ wchar32 rune;
+ const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin());
+ const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end());
+ while (cur != last) {
+ ReadUTF8CharAndAdvance(rune, cur, last);
+ if (!unicodeSet.Has(rune)) {
+ result = false;
+ break;
+ }
+ }
+ return TUnboxedValuePod(result);
+ }
+
#define REGISTER_NORMALIZE_UDF(name, mode) T##name,
+#define REGISTER_IS_CATEGORY_UDF(name, function) T##name,
#define EXPORTED_UNICODE_BASE_UDF \
NORMALIZE_UDF_MAP(REGISTER_NORMALIZE_UDF) \
+ IS_CATEGORY_UDF_MAP(REGISTER_IS_CATEGORY_UDF) \
TIsUtf, \
TGetLength, \
TSubstring, \
@@ -474,5 +532,6 @@ namespace {
TToTitle, \
TToUint64, \
TTryToUint64, \
- TStrip
+ TStrip, \
+ TIsUnicodeSet
}