diff options
author | amnosov <amnosov@yandex-team.com> | 2022-10-26 11:59:40 +0300 |
---|---|---|
committer | amnosov <amnosov@yandex-team.com> | 2022-10-26 11:59:40 +0300 |
commit | 4225eab76862f099d4d55a0205ab0cdd39c0433c (patch) | |
tree | 842ff268488999a8f54243cfb10ba96fb333645b /library/cpp | |
parent | 2399206380b6eab57bb7b9ad0bf0ecf851c94c1d (diff) | |
download | ydb-4225eab76862f099d4d55a0205ab0cdd39c0433c.tar.gz |
Unicode::Is{Category}
Unicode::Is{Category} udfs added
Diffstat (limited to 'library/cpp')
-rw-r--r-- | library/cpp/unicode/CMakeLists.txt | 1 | ||||
-rw-r--r-- | library/cpp/unicode/set/CMakeLists.txt | 33 | ||||
-rw-r--r-- | library/cpp/unicode/set/category_ranges.h | 18 | ||||
-rw-r--r-- | library/cpp/unicode/set/generated/category_ranges.cpp | 293 | ||||
-rw-r--r-- | library/cpp/unicode/set/quoted_pair.cpp | 53 | ||||
-rw-r--r-- | library/cpp/unicode/set/quoted_pair.h | 15 | ||||
-rw-r--r-- | library/cpp/unicode/set/set.cpp | 6 | ||||
-rw-r--r-- | library/cpp/unicode/set/set.h | 4 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set.cpp | 480 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set.h | 154 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set_lexer.h | 49 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set_lexer.rl6 | 125 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set_parser.cpp | 109 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set_parser.h | 11 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set_token.cpp | 1 | ||||
-rw-r--r-- | library/cpp/unicode/set/unicode_set_token.h | 68 |
16 files changed, 1420 insertions, 0 deletions
diff --git a/library/cpp/unicode/CMakeLists.txt b/library/cpp/unicode/CMakeLists.txt index 915ed345c1..0b54d5d98d 100644 --- a/library/cpp/unicode/CMakeLists.txt +++ b/library/cpp/unicode/CMakeLists.txt @@ -8,3 +8,4 @@ add_subdirectory(normalization) add_subdirectory(punycode) +add_subdirectory(set) diff --git a/library/cpp/unicode/set/CMakeLists.txt b/library/cpp/unicode/set/CMakeLists.txt new file mode 100644 index 0000000000..44380308ed --- /dev/null +++ b/library/cpp/unicode/set/CMakeLists.txt @@ -0,0 +1,33 @@ + +# This file was gererated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + + +add_library(cpp-unicode-set) +target_link_libraries(cpp-unicode-set PUBLIC + contrib-libs-cxxsupp + yutil + tools-enum_parser-enum_serialization_runtime +) +target_sources(cpp-unicode-set PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/set.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/quoted_pair.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_parser.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_token.cpp + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/generated/category_ranges.cpp +) +generate_enum_serilization(cpp-unicode-set + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_token.h + INCLUDE_HEADERS + library/cpp/unicode/set/unicode_set_token.h +) +target_ragel_lexers(cpp-unicode-set + PRIVATE + ${CMAKE_SOURCE_DIR}/library/cpp/unicode/set/unicode_set_lexer.rl6 + -CG2 +) diff --git a/library/cpp/unicode/set/category_ranges.h b/library/cpp/unicode/set/category_ranges.h new file mode 100644 index 0000000000..10256d2e6e --- /dev/null +++ b/library/cpp/unicode/set/category_ranges.h @@ -0,0 +1,18 @@ +#pragma once + +#include <util/charset/unidata.h> +#include <util/system/defaults.h> +#include <util/generic/strbuf.h> + +namespace NUnicode { + namespace NPrivate { + struct TCategoryRanges { + size_t Count; + const wchar32* Data; + }; + + const TCategoryRanges& GetCategoryRanges(WC_TYPE cat); + const TCategoryRanges& GetCategoryRanges(const TStringBuf& category); + + } +} diff --git a/library/cpp/unicode/set/generated/category_ranges.cpp b/library/cpp/unicode/set/generated/category_ranges.cpp new file mode 100644 index 0000000000..44e2430239 --- /dev/null +++ b/library/cpp/unicode/set/generated/category_ranges.cpp @@ -0,0 +1,293 @@ +#include <library/cpp/unicode/set/category_ranges.h> + +#include <util/generic/hash.h> +#include <util/generic/singleton.h> +#include <util/generic/yexception.h> +#include <utility> + +namespace NUnicode { +namespace NPrivate { + +static const wchar32 CAT_C[] = {0, 32, 127, 160, 173, 174, 888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1542, 1564, 1566, 1757, 1758, 1806, 1808, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2274, 2275, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6158, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8204, 8208, 8234, 8239, 8288, 8304, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65532, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69821, 69822, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113824, 118784, 119030, 119040, 119079, 119081, 119155, 119163, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917760, 918000, 1114112}; +static const wchar32 CAT_Cc[] = {0, 32, 127, 160, 1114112}; +static const wchar32 CAT_Cc_ASCII[] = {0, 9, 14, 28, 127, 160, 1114112}; +static const wchar32 CAT_Cc_SEPARATOR[] = {28, 32, 1114112}; +static const wchar32 CAT_Cc_SPACE[] = {9, 14, 1114112}; +static const wchar32 CAT_Cf[] = {173, 174, 1536, 1542, 1564, 1565, 1757, 1758, 1807, 1808, 2274, 2275, 6158, 6159, 8204, 8208, 8234, 8239, 8288, 8293, 8294, 8304, 65279, 65280, 65529, 65532, 69821, 69822, 113824, 113828, 119155, 119163, 917505, 917506, 917536, 917632, 1114112}; +static const wchar32 CAT_Cf_BIDI[] = {8206, 8208, 8234, 8239, 8294, 8296, 8297, 8298, 917544, 917546, 917595, 917596, 917597, 917598, 917627, 917628, 917629, 917630, 1114112}; +static const wchar32 CAT_Cf_FORMAT[] = {173, 174, 1536, 1542, 1564, 1565, 1757, 1758, 1807, 1808, 2274, 2275, 6158, 6159, 8289, 8293, 8296, 8297, 8298, 8304, 65529, 65532, 69821, 69822, 113824, 113828, 119155, 119163, 917505, 917506, 917536, 917544, 917546, 917595, 917596, 917597, 917598, 917627, 917628, 917629, 917630, 917632, 1114112}; +static const wchar32 CAT_Cf_JOIN[] = {8204, 8206, 8288, 8289, 1114112}; +static const wchar32 CAT_Cf_ZWNBSP[] = {65279, 65280, 1114112}; +static const wchar32 CAT_Cn[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112}; +static const wchar32 CAT_Cn_UNASSIGNED[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112}; +static const wchar32 CAT_Co[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112}; +static const wchar32 CAT_Co_PRIVATE[] = {888, 890, 896, 900, 907, 908, 909, 910, 930, 931, 1328, 1329, 1367, 1369, 1376, 1377, 1416, 1417, 1419, 1421, 1424, 1425, 1480, 1488, 1515, 1520, 1525, 1536, 1565, 1566, 1806, 1807, 1867, 1869, 1970, 1984, 2043, 2048, 2094, 2096, 2111, 2112, 2140, 2142, 2143, 2208, 2229, 2230, 2238, 2260, 2436, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2492, 2501, 2503, 2505, 2507, 2511, 2519, 2520, 2524, 2526, 2527, 2532, 2534, 2556, 2561, 2564, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2649, 2653, 2654, 2655, 2662, 2678, 2689, 2692, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2748, 2758, 2759, 2762, 2763, 2766, 2768, 2769, 2784, 2788, 2790, 2802, 2809, 2810, 2817, 2820, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2876, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2908, 2910, 2911, 2916, 2918, 2936, 2946, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3006, 3011, 3014, 3017, 3018, 3022, 3024, 3025, 3031, 3032, 3046, 3067, 3072, 3076, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3160, 3163, 3168, 3172, 3174, 3184, 3192, 3204, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3260, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3294, 3295, 3296, 3300, 3302, 3312, 3313, 3315, 3329, 3332, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3397, 3398, 3401, 3402, 3408, 3412, 3428, 3430, 3456, 3458, 3460, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3558, 3568, 3570, 3573, 3585, 3643, 3647, 3676, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3770, 3771, 3774, 3776, 3781, 3782, 3783, 3784, 3790, 3792, 3802, 3804, 3808, 3840, 3912, 3913, 3949, 3953, 3992, 3993, 4029, 4030, 4045, 4046, 4059, 4096, 4294, 4295, 4296, 4301, 4302, 4304, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4957, 4989, 4992, 5018, 5024, 5110, 5112, 5118, 5120, 5789, 5792, 5881, 5888, 5901, 5902, 5909, 5920, 5943, 5952, 5972, 5984, 5997, 5998, 6001, 6002, 6004, 6016, 6110, 6112, 6122, 6128, 6138, 6144, 6159, 6160, 6170, 6176, 6264, 6272, 6315, 6320, 6390, 6400, 6431, 6432, 6444, 6448, 6460, 6464, 6465, 6468, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6608, 6619, 6622, 6684, 6686, 6751, 6752, 6781, 6783, 6794, 6800, 6810, 6816, 6830, 6832, 6847, 6912, 6988, 6992, 7037, 7040, 7156, 7164, 7224, 7227, 7242, 7245, 7305, 7360, 7368, 7376, 7415, 7416, 7418, 7424, 7670, 7675, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8133, 8134, 8148, 8150, 8156, 8157, 8176, 8178, 8181, 8182, 8191, 8192, 8293, 8294, 8306, 8308, 8335, 8336, 8349, 8352, 8383, 8400, 8433, 8448, 8588, 8592, 9215, 9216, 9255, 9280, 9291, 9312, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11264, 11311, 11312, 11359, 11360, 11508, 11513, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11633, 11647, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11744, 11845, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12288, 12352, 12353, 12439, 12441, 12544, 12549, 12590, 12593, 12687, 12688, 12731, 12736, 12772, 12784, 12831, 12832, 13055, 13056, 19894, 19904, 40918, 40960, 42125, 42128, 42183, 42192, 42540, 42560, 42744, 42752, 42927, 42928, 42936, 42999, 43052, 43056, 43066, 43072, 43128, 43136, 43206, 43214, 43226, 43232, 43262, 43264, 43348, 43359, 43389, 43392, 43470, 43471, 43482, 43486, 43519, 43520, 43575, 43584, 43598, 43600, 43610, 43612, 43715, 43739, 43767, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43878, 43888, 44014, 44016, 44026, 44032, 55204, 55216, 55239, 55243, 55292, 55296, 57344, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64450, 64467, 64832, 64848, 64912, 64914, 64968, 65008, 65022, 65024, 65050, 65056, 65107, 65108, 65127, 65128, 65132, 65136, 65141, 65142, 65277, 65279, 65280, 65281, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65504, 65511, 65512, 65519, 65529, 65534, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 65792, 65795, 65799, 65844, 65847, 65935, 65936, 65948, 65952, 65953, 66000, 66046, 66176, 66205, 66208, 66257, 66272, 66300, 66304, 66340, 66352, 66379, 66384, 66427, 66432, 66462, 66463, 66500, 66504, 66518, 66560, 66718, 66720, 66730, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 66927, 66928, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67671, 67743, 67751, 67760, 67808, 67827, 67828, 67830, 67835, 67868, 67871, 67898, 67903, 67904, 67968, 68024, 68028, 68048, 68050, 68100, 68101, 68103, 68108, 68116, 68117, 68120, 68121, 68148, 68152, 68155, 68159, 68168, 68176, 68185, 68192, 68256, 68288, 68327, 68331, 68343, 68352, 68406, 68409, 68438, 68440, 68467, 68472, 68498, 68505, 68509, 68521, 68528, 68608, 68681, 68736, 68787, 68800, 68851, 68858, 68864, 69216, 69247, 69632, 69710, 69714, 69744, 69759, 69826, 69840, 69865, 69872, 69882, 69888, 69941, 69942, 69956, 69968, 70007, 70016, 70094, 70096, 70112, 70113, 70133, 70144, 70162, 70163, 70207, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70314, 70320, 70379, 70384, 70394, 70400, 70404, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70460, 70469, 70471, 70473, 70475, 70478, 70480, 70481, 70487, 70488, 70493, 70500, 70502, 70509, 70512, 70517, 70656, 70746, 70747, 70748, 70749, 70750, 70784, 70856, 70864, 70874, 71040, 71094, 71096, 71134, 71168, 71237, 71248, 71258, 71264, 71277, 71296, 71352, 71360, 71370, 71424, 71450, 71453, 71468, 71472, 71488, 71840, 71923, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72759, 72760, 72774, 72784, 72813, 72816, 72848, 72850, 72872, 72873, 72887, 73728, 74650, 74752, 74863, 74864, 74869, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92768, 92778, 92782, 92784, 92880, 92910, 92912, 92918, 92928, 92998, 93008, 93018, 93019, 93026, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94079, 94095, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 113820, 113828, 118784, 119030, 119040, 119079, 119081, 119273, 119296, 119366, 119552, 119639, 119648, 119666, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120780, 120782, 121484, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 124928, 125125, 125127, 125143, 125184, 125259, 125264, 125274, 125278, 125280, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127232, 127245, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 917505, 917506, 917536, 917632, 917760, 918000, 1114112}; +static const wchar32 CAT_Cs[] = {55296, 57344, 1114112}; +static const wchar32 CAT_Cs_HIGH[] = {55296, 56320, 1114112}; +static const wchar32 CAT_Cs_LOW[] = {56320, 57344, 1114112}; +static const wchar32 CAT_L[] = {65, 91, 97, 123, 170, 171, 181, 182, 186, 187, 192, 215, 216, 247, 248, 706, 710, 722, 736, 741, 748, 749, 750, 751, 880, 885, 886, 888, 890, 894, 895, 896, 902, 903, 904, 907, 908, 909, 910, 930, 931, 1014, 1015, 1154, 1162, 1328, 1329, 1367, 1369, 1370, 1377, 1416, 1488, 1515, 1520, 1523, 1568, 1611, 1646, 1648, 1649, 1748, 1749, 1750, 1765, 1767, 1774, 1776, 1786, 1789, 1791, 1792, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1970, 1994, 2027, 2036, 2038, 2042, 2043, 2048, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2112, 2137, 2208, 2229, 2230, 2238, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2417, 2433, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2493, 2494, 2510, 2511, 2524, 2526, 2527, 2530, 2544, 2546, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2649, 2653, 2654, 2655, 2674, 2677, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2749, 2750, 2768, 2769, 2784, 2786, 2809, 2810, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2877, 2878, 2908, 2910, 2911, 2914, 2929, 2930, 2947, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3024, 3025, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3134, 3160, 3163, 3168, 3170, 3200, 3201, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3261, 3262, 3294, 3295, 3296, 3298, 3313, 3315, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3390, 3406, 3407, 3412, 3415, 3423, 3426, 3450, 3456, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3585, 3633, 3634, 3636, 3648, 3655, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3761, 3762, 3764, 3773, 3774, 3776, 3781, 3782, 3783, 3804, 3808, 3840, 3841, 3904, 3912, 3913, 3949, 3976, 3981, 4096, 4139, 4159, 4160, 4176, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4256, 4294, 4295, 4296, 4301, 4302, 4304, 4347, 4348, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4992, 5008, 5024, 5110, 5112, 5118, 5121, 5741, 5743, 5760, 5761, 5787, 5792, 5867, 5873, 5881, 5888, 5901, 5902, 5906, 5920, 5938, 5952, 5970, 5984, 5997, 5998, 6001, 6016, 6068, 6103, 6104, 6108, 6109, 6176, 6264, 6272, 6277, 6279, 6313, 6314, 6315, 6320, 6390, 6400, 6431, 6480, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6656, 6679, 6688, 6741, 6823, 6824, 6917, 6964, 6981, 6988, 7043, 7073, 7086, 7088, 7098, 7142, 7168, 7204, 7245, 7248, 7258, 7294, 7296, 7305, 7401, 7405, 7406, 7410, 7413, 7415, 7424, 7616, 7680, 7958, 7960, 7966, 7968, 8006, 8008, 8014, 8016, 8024, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8062, 8064, 8117, 8118, 8125, 8126, 8127, 8130, 8133, 8134, 8141, 8144, 8148, 8150, 8156, 8160, 8173, 8178, 8181, 8182, 8189, 8305, 8306, 8319, 8320, 8336, 8349, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8512, 8517, 8522, 8526, 8527, 8579, 8581, 11264, 11311, 11312, 11359, 11360, 11493, 11499, 11503, 11506, 11508, 11520, 11558, 11559, 11560, 11565, 11566, 11568, 11624, 11631, 11632, 11648, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 11823, 11824, 12293, 12295, 12337, 12342, 12347, 12349, 12353, 12439, 12445, 12448, 12449, 12539, 12540, 12544, 12549, 12590, 12593, 12687, 12704, 12731, 12784, 12800, 13312, 19894, 19968, 40918, 40960, 42125, 42192, 42238, 42240, 42509, 42512, 42528, 42538, 42540, 42560, 42607, 42623, 42654, 42656, 42726, 42775, 42784, 42786, 42889, 42891, 42927, 42928, 42936, 42999, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43072, 43124, 43138, 43188, 43250, 43256, 43259, 43260, 43261, 43262, 43274, 43302, 43312, 43335, 43360, 43389, 43396, 43443, 43471, 43472, 43488, 43493, 43494, 43504, 43514, 43519, 43520, 43561, 43584, 43587, 43588, 43596, 43616, 43639, 43642, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43715, 43739, 43742, 43744, 43755, 43762, 43765, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43824, 43867, 43868, 43878, 43888, 44003, 44032, 55204, 55216, 55239, 55243, 55292, 63744, 64110, 64112, 64218, 64256, 64263, 64275, 64280, 64285, 64286, 64287, 64297, 64298, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64434, 64467, 64830, 64848, 64912, 64914, 64968, 65008, 65020, 65136, 65141, 65142, 65277, 65313, 65339, 65345, 65371, 65382, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 66176, 66205, 66208, 66257, 66304, 66336, 66352, 66369, 66370, 66378, 66384, 66422, 66432, 66462, 66464, 66500, 66504, 66512, 66560, 66718, 66736, 66772, 66776, 66812, 66816, 66856, 66864, 66916, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67680, 67703, 67712, 67743, 67808, 67827, 67828, 67830, 67840, 67862, 67872, 67898, 67968, 68024, 68030, 68032, 68096, 68097, 68112, 68116, 68117, 68120, 68121, 68148, 68192, 68221, 68224, 68253, 68288, 68296, 68297, 68325, 68352, 68406, 68416, 68438, 68448, 68467, 68480, 68498, 68608, 68681, 68736, 68787, 68800, 68851, 69635, 69688, 69763, 69808, 69840, 69865, 69891, 69927, 69968, 70003, 70006, 70007, 70019, 70067, 70081, 70085, 70106, 70107, 70108, 70109, 70144, 70162, 70163, 70188, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70313, 70320, 70367, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70461, 70462, 70480, 70481, 70493, 70498, 70656, 70709, 70727, 70731, 70784, 70832, 70852, 70854, 70855, 70856, 71040, 71087, 71128, 71132, 71168, 71216, 71236, 71237, 71296, 71339, 71424, 71450, 71840, 71904, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72751, 72768, 72769, 72818, 72848, 73728, 74650, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92880, 92910, 92928, 92976, 92992, 92996, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94033, 94099, 94112, 94176, 94177, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 119808, 119893, 119894, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119994, 119995, 119996, 119997, 120004, 120005, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120094, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120146, 120486, 120488, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 120780, 124928, 125125, 125184, 125252, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 1114112}; +static const wchar32 CAT_Ll[] = {97, 123, 181, 182, 223, 247, 248, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 443, 445, 448, 454, 455, 457, 458, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 499, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 660, 661, 688, 881, 882, 883, 884, 887, 888, 891, 894, 912, 913, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1014, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1377, 1416, 5112, 5118, 7296, 7305, 7424, 7468, 7531, 7544, 7545, 7579, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7958, 7968, 7976, 7984, 7992, 8000, 8006, 8016, 8024, 8032, 8040, 8048, 8062, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8117, 8118, 8120, 8126, 8127, 8130, 8133, 8134, 8136, 8144, 8148, 8150, 8152, 8160, 8168, 8178, 8181, 8182, 8184, 8458, 8459, 8462, 8464, 8467, 8468, 8495, 8496, 8500, 8501, 8505, 8506, 8508, 8510, 8518, 8522, 8526, 8527, 8580, 8581, 11312, 11359, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11388, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11493, 11500, 11501, 11502, 11503, 11507, 11508, 11520, 11558, 11559, 11560, 11565, 11566, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42606, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42652, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42864, 42865, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42888, 42892, 42893, 42894, 42895, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42933, 42934, 42935, 42936, 43002, 43003, 43824, 43867, 43872, 43878, 43888, 43968, 64256, 64263, 64275, 64280, 65345, 65371, 66600, 66640, 66776, 66812, 68800, 68851, 71872, 71904, 119834, 119860, 119886, 119893, 119894, 119912, 119938, 119964, 119990, 119994, 119995, 119996, 119997, 120004, 120005, 120016, 120042, 120068, 120094, 120120, 120146, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120486, 120514, 120539, 120540, 120546, 120572, 120597, 120598, 120604, 120630, 120655, 120656, 120662, 120688, 120713, 120714, 120720, 120746, 120771, 120772, 120778, 120779, 120780, 125218, 125252, 1114112}; +static const wchar32 CAT_Ll_LOWER[] = {97, 123, 181, 182, 223, 247, 248, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 443, 445, 448, 454, 455, 457, 458, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 499, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 660, 661, 688, 881, 882, 883, 884, 887, 888, 891, 894, 912, 913, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1014, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, 1377, 1416, 5112, 5118, 7296, 7305, 7424, 7468, 7531, 7544, 7545, 7579, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7958, 7968, 7976, 7984, 7992, 8000, 8006, 8016, 8024, 8032, 8040, 8048, 8062, 8064, 8072, 8080, 8088, 8096, 8104, 8112, 8117, 8118, 8120, 8126, 8127, 8130, 8133, 8134, 8136, 8144, 8148, 8150, 8152, 8160, 8168, 8178, 8181, 8182, 8184, 8458, 8459, 8462, 8464, 8467, 8468, 8495, 8496, 8500, 8501, 8505, 8506, 8508, 8510, 8518, 8522, 8526, 8527, 8580, 8581, 11312, 11359, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11388, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11493, 11500, 11501, 11502, 11503, 11507, 11508, 11520, 11558, 11559, 11560, 11565, 11566, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42606, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42652, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42864, 42865, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42888, 42892, 42893, 42894, 42895, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42933, 42934, 42935, 42936, 43002, 43003, 43824, 43867, 43872, 43878, 43888, 43968, 64256, 64263, 64275, 64280, 65345, 65371, 66600, 66640, 66776, 66812, 68800, 68851, 71872, 71904, 119834, 119860, 119886, 119893, 119894, 119912, 119938, 119964, 119990, 119994, 119995, 119996, 119997, 120004, 120005, 120016, 120042, 120068, 120094, 120120, 120146, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120486, 120514, 120539, 120540, 120546, 120572, 120597, 120598, 120604, 120630, 120655, 120656, 120662, 120688, 120713, 120714, 120720, 120746, 120771, 120772, 120778, 120779, 120780, 125218, 125252, 1114112}; +static const wchar32 CAT_Lm[] = {688, 706, 710, 722, 736, 741, 748, 749, 750, 751, 884, 885, 890, 891, 1369, 1370, 1600, 1601, 1765, 1767, 2036, 2038, 2042, 2043, 2074, 2075, 2084, 2085, 2088, 2089, 2417, 2418, 3654, 3655, 3782, 3783, 4348, 4349, 6103, 6104, 6211, 6212, 6823, 6824, 7288, 7294, 7468, 7531, 7544, 7545, 7579, 7616, 8305, 8306, 8319, 8320, 8336, 8349, 11388, 11390, 11631, 11632, 11823, 11824, 12293, 12294, 12337, 12342, 12347, 12348, 12445, 12447, 12540, 12543, 40981, 40982, 42232, 42238, 42508, 42509, 42623, 42624, 42652, 42654, 42775, 42784, 42864, 42865, 42888, 42889, 43000, 43002, 43471, 43472, 43494, 43495, 43632, 43633, 43741, 43742, 43763, 43765, 43868, 43872, 65392, 65393, 65438, 65440, 92992, 92996, 94099, 94112, 94176, 94177, 1114112}; +static const wchar32 CAT_Lm_EXTENDER[] = {711, 712, 720, 722, 884, 885, 890, 891, 1600, 1601, 2036, 2038, 2042, 2043, 2417, 2418, 3654, 3655, 3782, 3783, 6103, 6104, 6823, 6824, 7288, 7294, 11823, 11824, 12293, 12294, 12337, 12342, 12347, 12348, 12445, 12447, 12540, 12543, 40981, 40982, 42508, 42509, 42623, 42624, 43471, 43472, 43741, 43742, 43763, 43765, 65392, 65393, 65438, 65440, 92992, 92996, 94176, 94177, 1114112}; +static const wchar32 CAT_Lm_LETTER[] = {688, 706, 710, 711, 712, 720, 736, 741, 748, 749, 750, 751, 1369, 1370, 1765, 1767, 2074, 2075, 2084, 2085, 2088, 2089, 4348, 4349, 6211, 6212, 7468, 7531, 7544, 7545, 7579, 7616, 8305, 8306, 8319, 8320, 8336, 8349, 11388, 11390, 11631, 11632, 42232, 42238, 42652, 42654, 42775, 42784, 42864, 42865, 42888, 42889, 43000, 43002, 43494, 43495, 43632, 43633, 43868, 43872, 94099, 94112, 1114112}; +static const wchar32 CAT_Lo[] = {170, 171, 186, 187, 443, 444, 448, 452, 660, 661, 1488, 1515, 1520, 1523, 1568, 1600, 1601, 1611, 1646, 1648, 1649, 1748, 1749, 1750, 1774, 1776, 1786, 1789, 1791, 1792, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1970, 1994, 2027, 2048, 2070, 2112, 2137, 2208, 2229, 2230, 2238, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2418, 2433, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2493, 2494, 2510, 2511, 2524, 2526, 2527, 2530, 2544, 2546, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2649, 2653, 2654, 2655, 2674, 2677, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2749, 2750, 2768, 2769, 2784, 2786, 2809, 2810, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2877, 2878, 2908, 2910, 2911, 2914, 2929, 2930, 2947, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3024, 3025, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3134, 3160, 3163, 3168, 3170, 3200, 3201, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3261, 3262, 3294, 3295, 3296, 3298, 3313, 3315, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3390, 3406, 3407, 3412, 3415, 3423, 3426, 3450, 3456, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3585, 3633, 3634, 3636, 3648, 3654, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3761, 3762, 3764, 3773, 3774, 3776, 3781, 3804, 3808, 3840, 3841, 3904, 3912, 3913, 3949, 3976, 3981, 4096, 4139, 4159, 4160, 4176, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4304, 4347, 4349, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4992, 5008, 5121, 5741, 5743, 5760, 5761, 5787, 5792, 5867, 5873, 5881, 5888, 5901, 5902, 5906, 5920, 5938, 5952, 5970, 5984, 5997, 5998, 6001, 6016, 6068, 6108, 6109, 6176, 6211, 6212, 6264, 6272, 6277, 6279, 6313, 6314, 6315, 6320, 6390, 6400, 6431, 6480, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6656, 6679, 6688, 6741, 6917, 6964, 6981, 6988, 7043, 7073, 7086, 7088, 7098, 7142, 7168, 7204, 7245, 7248, 7258, 7288, 7401, 7405, 7406, 7410, 7413, 7415, 8501, 8505, 11568, 11624, 11648, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 12294, 12295, 12348, 12349, 12353, 12439, 12447, 12448, 12449, 12539, 12543, 12544, 12549, 12590, 12593, 12687, 12704, 12731, 12784, 12800, 13312, 19894, 19968, 40918, 40960, 40981, 40982, 42125, 42192, 42232, 42240, 42508, 42512, 42528, 42538, 42540, 42606, 42607, 42656, 42726, 42895, 42896, 42999, 43000, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43072, 43124, 43138, 43188, 43250, 43256, 43259, 43260, 43261, 43262, 43274, 43302, 43312, 43335, 43360, 43389, 43396, 43443, 43488, 43493, 43495, 43504, 43514, 43519, 43520, 43561, 43584, 43587, 43588, 43596, 43616, 43632, 43633, 43639, 43642, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43715, 43739, 43741, 43744, 43755, 43762, 43763, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43968, 44003, 44032, 55204, 55216, 55239, 55243, 55292, 63744, 64110, 64112, 64218, 64285, 64286, 64287, 64297, 64298, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64434, 64467, 64830, 64848, 64912, 64914, 64968, 65008, 65020, 65136, 65141, 65142, 65277, 65382, 65392, 65393, 65438, 65440, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 66176, 66205, 66208, 66257, 66304, 66336, 66352, 66369, 66370, 66378, 66384, 66422, 66432, 66462, 66464, 66500, 66504, 66512, 66640, 66718, 66816, 66856, 66864, 66916, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67680, 67703, 67712, 67743, 67808, 67827, 67828, 67830, 67840, 67862, 67872, 67898, 67968, 68024, 68030, 68032, 68096, 68097, 68112, 68116, 68117, 68120, 68121, 68148, 68192, 68221, 68224, 68253, 68288, 68296, 68297, 68325, 68352, 68406, 68416, 68438, 68448, 68467, 68480, 68498, 68608, 68681, 69635, 69688, 69763, 69808, 69840, 69865, 69891, 69927, 69968, 70003, 70006, 70007, 70019, 70067, 70081, 70085, 70106, 70107, 70108, 70109, 70144, 70162, 70163, 70188, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70313, 70320, 70367, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70461, 70462, 70480, 70481, 70493, 70498, 70656, 70709, 70727, 70731, 70784, 70832, 70852, 70854, 70855, 70856, 71040, 71087, 71128, 71132, 71168, 71216, 71236, 71237, 71296, 71339, 71424, 71450, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72751, 72768, 72769, 72818, 72848, 73728, 74650, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92880, 92910, 92928, 92976, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94033, 94208, 100333, 100352, 101107, 110592, 110594, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 124928, 125125, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 1114112}; +static const wchar32 CAT_Lo_HIRAGANA[] = {12353, 12439, 12447, 12448, 110593, 110594, 1114112}; +static const wchar32 CAT_Lo_IDEOGRAPH[] = {12294, 12295, 13312, 19894, 19968, 40918, 63744, 64110, 64112, 64218, 94208, 100333, 131072, 173783, 173824, 177973, 177984, 178206, 178208, 183970, 194560, 195102, 1114112}; +static const wchar32 CAT_Lo_KATAKANA[] = {12449, 12539, 12543, 12544, 12784, 12800, 65382, 65392, 65393, 65438, 110592, 110593, 1114112}; +static const wchar32 CAT_Lo_LEADING[] = {4352, 4448, 43360, 43389, 1114112}; +static const wchar32 CAT_Lo_OTHER[] = {170, 171, 186, 187, 443, 444, 448, 452, 660, 661, 1488, 1515, 1520, 1523, 1568, 1600, 1601, 1611, 1646, 1648, 1649, 1748, 1749, 1750, 1774, 1776, 1786, 1789, 1791, 1792, 1808, 1809, 1810, 1840, 1869, 1958, 1969, 1970, 1994, 2027, 2048, 2070, 2112, 2137, 2208, 2229, 2230, 2238, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2418, 2433, 2437, 2445, 2447, 2449, 2451, 2473, 2474, 2481, 2482, 2483, 2486, 2490, 2493, 2494, 2510, 2511, 2524, 2526, 2527, 2530, 2544, 2546, 2565, 2571, 2575, 2577, 2579, 2601, 2602, 2609, 2610, 2612, 2613, 2615, 2616, 2618, 2649, 2653, 2654, 2655, 2674, 2677, 2693, 2702, 2703, 2706, 2707, 2729, 2730, 2737, 2738, 2740, 2741, 2746, 2749, 2750, 2768, 2769, 2784, 2786, 2809, 2810, 2821, 2829, 2831, 2833, 2835, 2857, 2858, 2865, 2866, 2868, 2869, 2874, 2877, 2878, 2908, 2910, 2911, 2914, 2929, 2930, 2947, 2948, 2949, 2955, 2958, 2961, 2962, 2966, 2969, 2971, 2972, 2973, 2974, 2976, 2979, 2981, 2984, 2987, 2990, 3002, 3024, 3025, 3077, 3085, 3086, 3089, 3090, 3113, 3114, 3130, 3133, 3134, 3160, 3163, 3168, 3170, 3200, 3201, 3205, 3213, 3214, 3217, 3218, 3241, 3242, 3252, 3253, 3258, 3261, 3262, 3294, 3295, 3296, 3298, 3313, 3315, 3333, 3341, 3342, 3345, 3346, 3387, 3389, 3390, 3406, 3407, 3412, 3415, 3423, 3426, 3450, 3456, 3461, 3479, 3482, 3506, 3507, 3516, 3517, 3518, 3520, 3527, 3585, 3633, 3634, 3636, 3648, 3654, 3713, 3715, 3716, 3717, 3719, 3721, 3722, 3723, 3725, 3726, 3732, 3736, 3737, 3744, 3745, 3748, 3749, 3750, 3751, 3752, 3754, 3756, 3757, 3761, 3762, 3764, 3773, 3774, 3776, 3781, 3804, 3808, 3840, 3841, 3904, 3912, 3913, 3949, 3976, 3981, 4096, 4139, 4159, 4160, 4176, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4304, 4347, 4349, 4352, 4608, 4681, 4682, 4686, 4688, 4695, 4696, 4697, 4698, 4702, 4704, 4745, 4746, 4750, 4752, 4785, 4786, 4790, 4792, 4799, 4800, 4801, 4802, 4806, 4808, 4823, 4824, 4881, 4882, 4886, 4888, 4955, 4992, 5008, 5121, 5741, 5743, 5760, 5761, 5787, 5792, 5867, 5873, 5881, 5888, 5901, 5902, 5906, 5920, 5938, 5952, 5970, 5984, 5997, 5998, 6001, 6016, 6068, 6108, 6109, 6176, 6211, 6212, 6264, 6272, 6277, 6279, 6313, 6314, 6315, 6320, 6390, 6400, 6431, 6480, 6510, 6512, 6517, 6528, 6572, 6576, 6602, 6656, 6679, 6688, 6741, 6917, 6964, 6981, 6988, 7043, 7073, 7086, 7088, 7098, 7142, 7168, 7204, 7245, 7248, 7258, 7288, 7401, 7405, 7406, 7410, 7413, 7415, 8501, 8505, 11568, 11624, 11648, 11671, 11680, 11687, 11688, 11695, 11696, 11703, 11704, 11711, 11712, 11719, 11720, 11727, 11728, 11735, 11736, 11743, 12348, 12349, 12549, 12590, 12593, 12687, 12704, 12731, 40960, 40981, 40982, 42125, 42192, 42232, 42240, 42508, 42512, 42528, 42538, 42540, 42606, 42607, 42656, 42726, 42895, 42896, 42999, 43000, 43003, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43072, 43124, 43138, 43188, 43250, 43256, 43259, 43260, 43261, 43262, 43274, 43302, 43312, 43335, 43396, 43443, 43488, 43493, 43495, 43504, 43514, 43519, 43520, 43561, 43584, 43587, 43588, 43596, 43616, 43632, 43633, 43639, 43642, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43715, 43739, 43741, 43744, 43755, 43762, 43763, 43777, 43783, 43785, 43791, 43793, 43799, 43808, 43815, 43816, 43823, 43968, 44003, 44032, 55204, 64285, 64286, 64287, 64297, 64298, 64311, 64312, 64317, 64318, 64319, 64320, 64322, 64323, 64325, 64326, 64434, 64467, 64830, 64848, 64912, 64914, 64968, 65008, 65020, 65136, 65141, 65142, 65277, 65440, 65471, 65474, 65480, 65482, 65488, 65490, 65496, 65498, 65501, 65536, 65548, 65549, 65575, 65576, 65595, 65596, 65598, 65599, 65614, 65616, 65630, 65664, 65787, 66176, 66205, 66208, 66257, 66304, 66336, 66352, 66369, 66370, 66378, 66384, 66422, 66432, 66462, 66464, 66500, 66504, 66512, 66640, 66718, 66816, 66856, 66864, 66916, 67072, 67383, 67392, 67414, 67424, 67432, 67584, 67590, 67592, 67593, 67594, 67638, 67639, 67641, 67644, 67645, 67647, 67670, 67680, 67703, 67712, 67743, 67808, 67827, 67828, 67830, 67840, 67862, 67872, 67898, 67968, 68024, 68030, 68032, 68096, 68097, 68112, 68116, 68117, 68120, 68121, 68148, 68192, 68221, 68224, 68253, 68288, 68296, 68297, 68325, 68352, 68406, 68416, 68438, 68448, 68467, 68480, 68498, 68608, 68681, 69635, 69688, 69763, 69808, 69840, 69865, 69891, 69927, 69968, 70003, 70006, 70007, 70019, 70067, 70081, 70085, 70106, 70107, 70108, 70109, 70144, 70162, 70163, 70188, 70272, 70279, 70280, 70281, 70282, 70286, 70287, 70302, 70303, 70313, 70320, 70367, 70405, 70413, 70415, 70417, 70419, 70441, 70442, 70449, 70450, 70452, 70453, 70458, 70461, 70462, 70480, 70481, 70493, 70498, 70656, 70709, 70727, 70731, 70784, 70832, 70852, 70854, 70855, 70856, 71040, 71087, 71128, 71132, 71168, 71216, 71236, 71237, 71296, 71339, 71424, 71450, 71935, 71936, 72384, 72441, 72704, 72713, 72714, 72751, 72768, 72769, 72818, 72848, 73728, 74650, 74880, 75076, 77824, 78895, 82944, 83527, 92160, 92729, 92736, 92767, 92880, 92910, 92928, 92976, 93027, 93048, 93053, 93072, 93952, 94021, 94032, 94033, 100352, 101107, 113664, 113771, 113776, 113789, 113792, 113801, 113808, 113818, 124928, 125125, 126464, 126468, 126469, 126496, 126497, 126499, 126500, 126501, 126503, 126504, 126505, 126515, 126516, 126520, 126521, 126522, 126523, 126524, 126530, 126531, 126535, 126536, 126537, 126538, 126539, 126540, 126541, 126544, 126545, 126547, 126548, 126549, 126551, 126552, 126553, 126554, 126555, 126556, 126557, 126558, 126559, 126560, 126561, 126563, 126564, 126565, 126567, 126571, 126572, 126579, 126580, 126584, 126585, 126589, 126590, 126591, 126592, 126602, 126603, 126620, 126625, 126628, 126629, 126634, 126635, 126652, 1114112}; +static const wchar32 CAT_Lo_TRAILING[] = {4520, 4608, 55243, 55292, 1114112}; +static const wchar32 CAT_Lo_VOWEL[] = {4448, 4520, 55216, 55239, 1114112}; +static const wchar32 CAT_Lt[] = {453, 454, 456, 457, 459, 460, 498, 499, 8072, 8080, 8088, 8096, 8104, 8112, 8124, 8125, 8140, 8141, 8188, 8189, 1114112}; +static const wchar32 CAT_Lt_TITLE[] = {453, 454, 456, 457, 459, 460, 498, 499, 8072, 8080, 8088, 8096, 8104, 8112, 8124, 8125, 8140, 8141, 8188, 8189, 1114112}; +static const wchar32 CAT_Lu[] = {65, 91, 192, 215, 216, 223, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 444, 445, 452, 453, 455, 456, 458, 459, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 498, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 880, 881, 882, 883, 886, 887, 895, 896, 902, 903, 904, 907, 908, 909, 910, 912, 913, 930, 931, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1015, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1329, 1367, 4256, 4294, 4295, 4296, 4301, 4302, 5024, 5110, 7680, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7960, 7966, 7976, 7984, 7992, 8000, 8008, 8014, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8032, 8040, 8048, 8120, 8124, 8136, 8140, 8152, 8156, 8168, 8173, 8184, 8188, 8450, 8451, 8455, 8456, 8459, 8462, 8464, 8467, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8496, 8500, 8510, 8512, 8517, 8518, 8579, 8580, 11264, 11311, 11360, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11390, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11499, 11500, 11501, 11502, 11506, 11507, 42560, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42624, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42786, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42891, 42892, 42893, 42894, 42896, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42927, 42928, 42933, 42934, 42935, 65313, 65339, 66560, 66600, 66736, 66772, 68736, 68787, 71840, 71872, 119808, 119834, 119860, 119886, 119912, 119938, 119964, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119990, 120016, 120042, 120068, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120120, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120488, 120513, 120546, 120571, 120604, 120629, 120662, 120687, 120720, 120745, 120778, 120779, 125184, 125218, 1114112}; +static const wchar32 CAT_Lu_UPPER[] = {65, 91, 192, 215, 216, 223, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 378, 379, 380, 381, 382, 385, 387, 388, 389, 390, 392, 393, 396, 398, 402, 403, 405, 406, 409, 412, 414, 415, 417, 418, 419, 420, 421, 422, 424, 425, 426, 428, 429, 430, 432, 433, 436, 437, 438, 439, 441, 444, 445, 452, 453, 455, 456, 458, 459, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 497, 498, 500, 501, 502, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 570, 572, 573, 575, 577, 578, 579, 583, 584, 585, 586, 587, 588, 589, 590, 591, 880, 881, 882, 883, 886, 887, 895, 896, 902, 903, 904, 907, 908, 909, 910, 912, 913, 930, 931, 940, 975, 976, 978, 981, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1012, 1013, 1015, 1016, 1017, 1019, 1021, 1072, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300, 1301, 1302, 1303, 1304, 1305, 1306, 1307, 1308, 1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, 1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1329, 1367, 4256, 4294, 4295, 4296, 4301, 4302, 5024, 5110, 7680, 7681, 7682, 7683, 7684, 7685, 7686, 7687, 7688, 7689, 7690, 7691, 7692, 7693, 7694, 7695, 7696, 7697, 7698, 7699, 7700, 7701, 7702, 7703, 7704, 7705, 7706, 7707, 7708, 7709, 7710, 7711, 7712, 7713, 7714, 7715, 7716, 7717, 7718, 7719, 7720, 7721, 7722, 7723, 7724, 7725, 7726, 7727, 7728, 7729, 7730, 7731, 7732, 7733, 7734, 7735, 7736, 7737, 7738, 7739, 7740, 7741, 7742, 7743, 7744, 7745, 7746, 7747, 7748, 7749, 7750, 7751, 7752, 7753, 7754, 7755, 7756, 7757, 7758, 7759, 7760, 7761, 7762, 7763, 7764, 7765, 7766, 7767, 7768, 7769, 7770, 7771, 7772, 7773, 7774, 7775, 7776, 7777, 7778, 7779, 7780, 7781, 7782, 7783, 7784, 7785, 7786, 7787, 7788, 7789, 7790, 7791, 7792, 7793, 7794, 7795, 7796, 7797, 7798, 7799, 7800, 7801, 7802, 7803, 7804, 7805, 7806, 7807, 7808, 7809, 7810, 7811, 7812, 7813, 7814, 7815, 7816, 7817, 7818, 7819, 7820, 7821, 7822, 7823, 7824, 7825, 7826, 7827, 7828, 7829, 7838, 7839, 7840, 7841, 7842, 7843, 7844, 7845, 7846, 7847, 7848, 7849, 7850, 7851, 7852, 7853, 7854, 7855, 7856, 7857, 7858, 7859, 7860, 7861, 7862, 7863, 7864, 7865, 7866, 7867, 7868, 7869, 7870, 7871, 7872, 7873, 7874, 7875, 7876, 7877, 7878, 7879, 7880, 7881, 7882, 7883, 7884, 7885, 7886, 7887, 7888, 7889, 7890, 7891, 7892, 7893, 7894, 7895, 7896, 7897, 7898, 7899, 7900, 7901, 7902, 7903, 7904, 7905, 7906, 7907, 7908, 7909, 7910, 7911, 7912, 7913, 7914, 7915, 7916, 7917, 7918, 7919, 7920, 7921, 7922, 7923, 7924, 7925, 7926, 7927, 7928, 7929, 7930, 7931, 7932, 7933, 7934, 7935, 7944, 7952, 7960, 7966, 7976, 7984, 7992, 8000, 8008, 8014, 8025, 8026, 8027, 8028, 8029, 8030, 8031, 8032, 8040, 8048, 8120, 8124, 8136, 8140, 8152, 8156, 8168, 8173, 8184, 8188, 8450, 8451, 8455, 8456, 8459, 8462, 8464, 8467, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8496, 8500, 8510, 8512, 8517, 8518, 8579, 8580, 11264, 11311, 11360, 11361, 11362, 11365, 11367, 11368, 11369, 11370, 11371, 11372, 11373, 11377, 11378, 11379, 11381, 11382, 11390, 11393, 11394, 11395, 11396, 11397, 11398, 11399, 11400, 11401, 11402, 11403, 11404, 11405, 11406, 11407, 11408, 11409, 11410, 11411, 11412, 11413, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11422, 11423, 11424, 11425, 11426, 11427, 11428, 11429, 11430, 11431, 11432, 11433, 11434, 11435, 11436, 11437, 11438, 11439, 11440, 11441, 11442, 11443, 11444, 11445, 11446, 11447, 11448, 11449, 11450, 11451, 11452, 11453, 11454, 11455, 11456, 11457, 11458, 11459, 11460, 11461, 11462, 11463, 11464, 11465, 11466, 11467, 11468, 11469, 11470, 11471, 11472, 11473, 11474, 11475, 11476, 11477, 11478, 11479, 11480, 11481, 11482, 11483, 11484, 11485, 11486, 11487, 11488, 11489, 11490, 11491, 11499, 11500, 11501, 11502, 11506, 11507, 42560, 42561, 42562, 42563, 42564, 42565, 42566, 42567, 42568, 42569, 42570, 42571, 42572, 42573, 42574, 42575, 42576, 42577, 42578, 42579, 42580, 42581, 42582, 42583, 42584, 42585, 42586, 42587, 42588, 42589, 42590, 42591, 42592, 42593, 42594, 42595, 42596, 42597, 42598, 42599, 42600, 42601, 42602, 42603, 42604, 42605, 42624, 42625, 42626, 42627, 42628, 42629, 42630, 42631, 42632, 42633, 42634, 42635, 42636, 42637, 42638, 42639, 42640, 42641, 42642, 42643, 42644, 42645, 42646, 42647, 42648, 42649, 42650, 42651, 42786, 42787, 42788, 42789, 42790, 42791, 42792, 42793, 42794, 42795, 42796, 42797, 42798, 42799, 42802, 42803, 42804, 42805, 42806, 42807, 42808, 42809, 42810, 42811, 42812, 42813, 42814, 42815, 42816, 42817, 42818, 42819, 42820, 42821, 42822, 42823, 42824, 42825, 42826, 42827, 42828, 42829, 42830, 42831, 42832, 42833, 42834, 42835, 42836, 42837, 42838, 42839, 42840, 42841, 42842, 42843, 42844, 42845, 42846, 42847, 42848, 42849, 42850, 42851, 42852, 42853, 42854, 42855, 42856, 42857, 42858, 42859, 42860, 42861, 42862, 42863, 42873, 42874, 42875, 42876, 42877, 42879, 42880, 42881, 42882, 42883, 42884, 42885, 42886, 42887, 42891, 42892, 42893, 42894, 42896, 42897, 42898, 42899, 42902, 42903, 42904, 42905, 42906, 42907, 42908, 42909, 42910, 42911, 42912, 42913, 42914, 42915, 42916, 42917, 42918, 42919, 42920, 42921, 42922, 42927, 42928, 42933, 42934, 42935, 65313, 65339, 66560, 66600, 66736, 66772, 68736, 68787, 71840, 71872, 119808, 119834, 119860, 119886, 119912, 119938, 119964, 119965, 119966, 119968, 119970, 119971, 119973, 119975, 119977, 119981, 119982, 119990, 120016, 120042, 120068, 120070, 120071, 120075, 120077, 120085, 120086, 120093, 120120, 120122, 120123, 120127, 120128, 120133, 120134, 120135, 120138, 120145, 120172, 120198, 120224, 120250, 120276, 120302, 120328, 120354, 120380, 120406, 120432, 120458, 120488, 120513, 120546, 120571, 120604, 120629, 120662, 120687, 120720, 120745, 120778, 120779, 125184, 125218, 1114112}; +static const wchar32 CAT_M[] = {768, 880, 1155, 1162, 1425, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1480, 1552, 1563, 1611, 1632, 1648, 1649, 1750, 1757, 1759, 1765, 1767, 1769, 1770, 1774, 1809, 1810, 1840, 1867, 1958, 1969, 2027, 2036, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2094, 2137, 2140, 2260, 2274, 2275, 2308, 2362, 2365, 2366, 2384, 2385, 2392, 2402, 2404, 2433, 2436, 2492, 2493, 2494, 2501, 2503, 2505, 2507, 2510, 2519, 2520, 2530, 2532, 2561, 2564, 2620, 2621, 2622, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2672, 2674, 2677, 2678, 2689, 2692, 2748, 2749, 2750, 2758, 2759, 2762, 2763, 2766, 2786, 2788, 2817, 2820, 2876, 2877, 2878, 2885, 2887, 2889, 2891, 2894, 2902, 2904, 2914, 2916, 2946, 2947, 3006, 3011, 3014, 3017, 3018, 3022, 3031, 3032, 3072, 3076, 3134, 3141, 3142, 3145, 3146, 3150, 3157, 3159, 3170, 3172, 3201, 3204, 3260, 3261, 3262, 3269, 3270, 3273, 3274, 3278, 3285, 3287, 3298, 3300, 3329, 3332, 3390, 3397, 3398, 3401, 3402, 3406, 3415, 3416, 3426, 3428, 3458, 3460, 3530, 3531, 3535, 3541, 3542, 3543, 3544, 3552, 3570, 3572, 3633, 3634, 3636, 3643, 3655, 3663, 3761, 3762, 3764, 3770, 3771, 3773, 3784, 3790, 3864, 3866, 3893, 3894, 3895, 3896, 3897, 3898, 3902, 3904, 3953, 3973, 3974, 3976, 3981, 3992, 3993, 4029, 4038, 4039, 4139, 4159, 4182, 4186, 4190, 4193, 4194, 4197, 4199, 4206, 4209, 4213, 4226, 4238, 4239, 4240, 4250, 4254, 4957, 4960, 5906, 5909, 5938, 5941, 5970, 5972, 6002, 6004, 6068, 6100, 6109, 6110, 6155, 6158, 6277, 6279, 6313, 6314, 6432, 6444, 6448, 6460, 6679, 6684, 6741, 6751, 6752, 6781, 6783, 6784, 6832, 6847, 6912, 6917, 6964, 6981, 7019, 7028, 7040, 7043, 7073, 7086, 7142, 7156, 7204, 7224, 7376, 7379, 7380, 7401, 7405, 7406, 7410, 7413, 7416, 7418, 7616, 7670, 7675, 7680, 8400, 8433, 11503, 11506, 11647, 11648, 11744, 11776, 12330, 12336, 12441, 12443, 42607, 42611, 42612, 42622, 42654, 42656, 42736, 42738, 43010, 43011, 43014, 43015, 43019, 43020, 43043, 43048, 43136, 43138, 43188, 43206, 43232, 43250, 43302, 43310, 43335, 43348, 43392, 43396, 43443, 43457, 43493, 43494, 43561, 43575, 43587, 43588, 43596, 43598, 43643, 43646, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43755, 43760, 43765, 43767, 44003, 44011, 44012, 44014, 64286, 64287, 65024, 65040, 65056, 65072, 66045, 66046, 66272, 66273, 66422, 66427, 68097, 68100, 68101, 68103, 68108, 68112, 68152, 68155, 68159, 68160, 68325, 68327, 69632, 69635, 69688, 69703, 69759, 69763, 69808, 69819, 69888, 69891, 69927, 69941, 70003, 70004, 70016, 70019, 70067, 70081, 70090, 70093, 70188, 70200, 70206, 70207, 70367, 70379, 70400, 70404, 70460, 70461, 70462, 70469, 70471, 70473, 70475, 70478, 70487, 70488, 70498, 70500, 70502, 70509, 70512, 70517, 70709, 70727, 70832, 70852, 71087, 71094, 71096, 71105, 71132, 71134, 71216, 71233, 71339, 71352, 71453, 71468, 72751, 72759, 72760, 72768, 72850, 72872, 72873, 72887, 92912, 92917, 92976, 92983, 94033, 94079, 94095, 94099, 113821, 113823, 119141, 119146, 119149, 119155, 119163, 119171, 119173, 119180, 119210, 119214, 119362, 119365, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 125136, 125143, 125252, 125259, 917760, 918000, 1114112}; +static const wchar32 CAT_Mc[] = {2307, 2308, 2363, 2364, 2366, 2369, 2377, 2381, 2382, 2384, 2434, 2436, 2494, 2497, 2503, 2505, 2507, 2509, 2519, 2520, 2563, 2564, 2622, 2625, 2691, 2692, 2750, 2753, 2761, 2762, 2763, 2765, 2818, 2820, 2878, 2879, 2880, 2881, 2887, 2889, 2891, 2893, 2903, 2904, 3006, 3008, 3009, 3011, 3014, 3017, 3018, 3021, 3031, 3032, 3073, 3076, 3137, 3141, 3202, 3204, 3262, 3263, 3264, 3269, 3271, 3273, 3274, 3276, 3285, 3287, 3330, 3332, 3390, 3393, 3398, 3401, 3402, 3405, 3415, 3416, 3458, 3460, 3535, 3538, 3544, 3552, 3570, 3572, 3902, 3904, 3967, 3968, 4139, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4182, 4184, 4194, 4197, 4199, 4206, 4227, 4229, 4231, 4237, 4239, 4240, 4250, 4253, 6070, 6071, 6078, 6086, 6087, 6089, 6435, 6439, 6441, 6444, 6448, 6450, 6451, 6457, 6681, 6683, 6741, 6742, 6743, 6744, 6753, 6754, 6755, 6757, 6765, 6771, 6916, 6917, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 6981, 7042, 7043, 7073, 7074, 7078, 7080, 7082, 7083, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7156, 7204, 7212, 7220, 7222, 7393, 7394, 7410, 7412, 12334, 12336, 43043, 43045, 43047, 43048, 43136, 43138, 43188, 43204, 43346, 43348, 43395, 43396, 43444, 43446, 43450, 43452, 43453, 43457, 43567, 43569, 43571, 43573, 43597, 43598, 43643, 43644, 43645, 43646, 43755, 43756, 43758, 43760, 43765, 43766, 44003, 44005, 44006, 44008, 44009, 44011, 44012, 44013, 69632, 69633, 69634, 69635, 69762, 69763, 69808, 69811, 69815, 69817, 69932, 69933, 70018, 70019, 70067, 70070, 70079, 70081, 70188, 70191, 70194, 70196, 70197, 70198, 70368, 70371, 70402, 70404, 70462, 70464, 70465, 70469, 70471, 70473, 70475, 70478, 70487, 70488, 70498, 70500, 70709, 70712, 70720, 70722, 70725, 70726, 70832, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 71087, 71090, 71096, 71100, 71102, 71103, 71216, 71219, 71227, 71229, 71230, 71231, 71340, 71341, 71342, 71344, 71350, 71351, 71456, 71458, 71462, 71463, 72751, 72752, 72766, 72767, 72873, 72874, 72881, 72882, 72884, 72885, 94033, 94079, 119141, 119143, 119149, 119155, 1114112}; +static const wchar32 CAT_Mc_SPACING[] = {2307, 2308, 2363, 2364, 2366, 2369, 2377, 2381, 2382, 2384, 2434, 2436, 2494, 2497, 2503, 2505, 2507, 2509, 2519, 2520, 2563, 2564, 2622, 2625, 2691, 2692, 2750, 2753, 2761, 2762, 2763, 2765, 2818, 2820, 2878, 2879, 2880, 2881, 2887, 2889, 2891, 2893, 2903, 2904, 3006, 3008, 3009, 3011, 3014, 3017, 3018, 3021, 3031, 3032, 3073, 3076, 3137, 3141, 3202, 3204, 3262, 3263, 3264, 3269, 3271, 3273, 3274, 3276, 3285, 3287, 3330, 3332, 3390, 3393, 3398, 3401, 3402, 3405, 3415, 3416, 3458, 3460, 3535, 3538, 3544, 3552, 3570, 3572, 3902, 3904, 3967, 3968, 4139, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4182, 4184, 4194, 4197, 4199, 4206, 4227, 4229, 4231, 4237, 4239, 4240, 4250, 4253, 6070, 6071, 6078, 6086, 6087, 6089, 6435, 6439, 6441, 6444, 6448, 6450, 6451, 6457, 6681, 6683, 6741, 6742, 6743, 6744, 6753, 6754, 6755, 6757, 6765, 6771, 6916, 6917, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 6981, 7042, 7043, 7073, 7074, 7078, 7080, 7082, 7083, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7156, 7204, 7212, 7220, 7222, 7393, 7394, 7410, 7412, 12334, 12336, 43043, 43045, 43047, 43048, 43136, 43138, 43188, 43204, 43346, 43348, 43395, 43396, 43444, 43446, 43450, 43452, 43453, 43457, 43567, 43569, 43571, 43573, 43597, 43598, 43643, 43644, 43645, 43646, 43755, 43756, 43758, 43760, 43765, 43766, 44003, 44005, 44006, 44008, 44009, 44011, 44012, 44013, 69632, 69633, 69634, 69635, 69762, 69763, 69808, 69811, 69815, 69817, 69932, 69933, 70018, 70019, 70067, 70070, 70079, 70081, 70188, 70191, 70194, 70196, 70197, 70198, 70368, 70371, 70402, 70404, 70462, 70464, 70465, 70469, 70471, 70473, 70475, 70478, 70487, 70488, 70498, 70500, 70709, 70712, 70720, 70722, 70725, 70726, 70832, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 71087, 71090, 71096, 71100, 71102, 71103, 71216, 71219, 71227, 71229, 71230, 71231, 71340, 71341, 71342, 71344, 71350, 71351, 71456, 71458, 71462, 71463, 72751, 72752, 72766, 72767, 72873, 72874, 72881, 72882, 72884, 72885, 94033, 94079, 119141, 119143, 119149, 119155, 1114112}; +static const wchar32 CAT_Me[] = {1160, 1162, 6846, 6847, 8413, 8417, 8418, 8421, 42608, 42611, 1114112}; +static const wchar32 CAT_Me_ENCLOSING[] = {1160, 1162, 6846, 6847, 8413, 8417, 8418, 8421, 42608, 42611, 1114112}; +static const wchar32 CAT_Mn[] = {768, 880, 1155, 1160, 1425, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1480, 1552, 1563, 1611, 1632, 1648, 1649, 1750, 1757, 1759, 1765, 1767, 1769, 1770, 1774, 1809, 1810, 1840, 1867, 1958, 1969, 2027, 2036, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2094, 2137, 2140, 2260, 2274, 2275, 2307, 2362, 2363, 2364, 2365, 2369, 2377, 2381, 2382, 2385, 2392, 2402, 2404, 2433, 2434, 2492, 2493, 2497, 2501, 2509, 2510, 2530, 2532, 2561, 2563, 2620, 2621, 2625, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2672, 2674, 2677, 2678, 2689, 2691, 2748, 2749, 2753, 2758, 2759, 2761, 2765, 2766, 2786, 2788, 2817, 2818, 2876, 2877, 2879, 2880, 2881, 2885, 2893, 2894, 2902, 2903, 2914, 2916, 2946, 2947, 3008, 3009, 3021, 3022, 3072, 3073, 3134, 3137, 3142, 3145, 3146, 3150, 3157, 3159, 3170, 3172, 3201, 3202, 3260, 3261, 3263, 3264, 3270, 3271, 3276, 3278, 3298, 3300, 3329, 3330, 3393, 3397, 3405, 3406, 3426, 3428, 3530, 3531, 3538, 3541, 3542, 3543, 3633, 3634, 3636, 3643, 3655, 3663, 3761, 3762, 3764, 3770, 3771, 3773, 3784, 3790, 3864, 3866, 3893, 3894, 3895, 3896, 3897, 3898, 3953, 3967, 3968, 3973, 3974, 3976, 3981, 3992, 3993, 4029, 4038, 4039, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4159, 4184, 4186, 4190, 4193, 4209, 4213, 4226, 4227, 4229, 4231, 4237, 4238, 4253, 4254, 4957, 4960, 5906, 5909, 5938, 5941, 5970, 5972, 6002, 6004, 6068, 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6109, 6110, 6155, 6158, 6277, 6279, 6313, 6314, 6432, 6435, 6439, 6441, 6450, 6451, 6457, 6460, 6679, 6681, 6683, 6684, 6742, 6743, 6744, 6751, 6752, 6753, 6754, 6755, 6757, 6765, 6771, 6781, 6783, 6784, 6832, 6846, 6912, 6916, 6964, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 7019, 7028, 7040, 7042, 7074, 7078, 7080, 7082, 7083, 7086, 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7212, 7220, 7222, 7224, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, 7412, 7413, 7416, 7418, 7616, 7670, 7675, 7680, 8400, 8413, 8417, 8418, 8421, 8433, 11503, 11506, 11647, 11648, 11744, 11776, 12330, 12334, 12441, 12443, 42607, 42608, 42612, 42622, 42654, 42656, 42736, 42738, 43010, 43011, 43014, 43015, 43019, 43020, 43045, 43047, 43204, 43206, 43232, 43250, 43302, 43310, 43335, 43346, 43392, 43395, 43443, 43444, 43446, 43450, 43452, 43453, 43493, 43494, 43561, 43567, 43569, 43571, 43573, 43575, 43587, 43588, 43596, 43597, 43644, 43645, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43756, 43758, 43766, 43767, 44005, 44006, 44008, 44009, 44013, 44014, 64286, 64287, 65024, 65040, 65056, 65072, 66045, 66046, 66272, 66273, 66422, 66427, 68097, 68100, 68101, 68103, 68108, 68112, 68152, 68155, 68159, 68160, 68325, 68327, 69633, 69634, 69688, 69703, 69759, 69762, 69811, 69815, 69817, 69819, 69888, 69891, 69927, 69932, 69933, 69941, 70003, 70004, 70016, 70018, 70070, 70079, 70090, 70093, 70191, 70194, 70196, 70197, 70198, 70200, 70206, 70207, 70367, 70368, 70371, 70379, 70400, 70402, 70460, 70461, 70464, 70465, 70502, 70509, 70512, 70517, 70712, 70720, 70722, 70725, 70726, 70727, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 70852, 71090, 71094, 71100, 71102, 71103, 71105, 71132, 71134, 71219, 71227, 71229, 71230, 71231, 71233, 71339, 71340, 71341, 71342, 71344, 71350, 71351, 71352, 71453, 71456, 71458, 71462, 71463, 71468, 72752, 72759, 72760, 72766, 72767, 72768, 72850, 72872, 72874, 72881, 72882, 72884, 72885, 72887, 92912, 92917, 92976, 92983, 94095, 94099, 113821, 113823, 119143, 119146, 119163, 119171, 119173, 119180, 119210, 119214, 119362, 119365, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 125136, 125143, 125252, 125259, 917760, 918000, 1114112}; +static const wchar32 CAT_Mn_NONSPACING[] = {768, 880, 1155, 1160, 1425, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1480, 1552, 1563, 1611, 1632, 1648, 1649, 1750, 1757, 1759, 1765, 1767, 1769, 1770, 1774, 1809, 1810, 1840, 1867, 1958, 1969, 2027, 2036, 2070, 2074, 2075, 2084, 2085, 2088, 2089, 2094, 2137, 2140, 2260, 2274, 2275, 2307, 2362, 2363, 2364, 2365, 2369, 2377, 2381, 2382, 2385, 2392, 2402, 2404, 2433, 2434, 2492, 2493, 2497, 2501, 2509, 2510, 2530, 2532, 2561, 2563, 2620, 2621, 2625, 2627, 2631, 2633, 2635, 2638, 2641, 2642, 2672, 2674, 2677, 2678, 2689, 2691, 2748, 2749, 2753, 2758, 2759, 2761, 2765, 2766, 2786, 2788, 2817, 2818, 2876, 2877, 2879, 2880, 2881, 2885, 2893, 2894, 2902, 2903, 2914, 2916, 2946, 2947, 3008, 3009, 3021, 3022, 3072, 3073, 3134, 3137, 3142, 3145, 3146, 3150, 3157, 3159, 3170, 3172, 3201, 3202, 3260, 3261, 3263, 3264, 3270, 3271, 3276, 3278, 3298, 3300, 3329, 3330, 3393, 3397, 3405, 3406, 3426, 3428, 3530, 3531, 3538, 3541, 3542, 3543, 3633, 3634, 3636, 3643, 3655, 3663, 3761, 3762, 3764, 3770, 3771, 3773, 3784, 3790, 3864, 3866, 3893, 3894, 3895, 3896, 3897, 3898, 3953, 3967, 3968, 3973, 3974, 3976, 3981, 3992, 3993, 4029, 4038, 4039, 4141, 4145, 4146, 4152, 4153, 4155, 4157, 4159, 4184, 4186, 4190, 4193, 4209, 4213, 4226, 4227, 4229, 4231, 4237, 4238, 4253, 4254, 4957, 4960, 5906, 5909, 5938, 5941, 5970, 5972, 6002, 6004, 6068, 6070, 6071, 6078, 6086, 6087, 6089, 6100, 6109, 6110, 6155, 6158, 6277, 6279, 6313, 6314, 6432, 6435, 6439, 6441, 6450, 6451, 6457, 6460, 6679, 6681, 6683, 6684, 6742, 6743, 6744, 6751, 6752, 6753, 6754, 6755, 6757, 6765, 6771, 6781, 6783, 6784, 6832, 6846, 6912, 6916, 6964, 6965, 6966, 6971, 6972, 6973, 6978, 6979, 7019, 7028, 7040, 7042, 7074, 7078, 7080, 7082, 7083, 7086, 7142, 7143, 7144, 7146, 7149, 7150, 7151, 7154, 7212, 7220, 7222, 7224, 7376, 7379, 7380, 7393, 7394, 7401, 7405, 7406, 7412, 7413, 7416, 7418, 7616, 7670, 7675, 7680, 8400, 8413, 8417, 8418, 8421, 8433, 11503, 11506, 11647, 11648, 11744, 11776, 12330, 12334, 12441, 12443, 42607, 42608, 42612, 42622, 42654, 42656, 42736, 42738, 43010, 43011, 43014, 43015, 43019, 43020, 43045, 43047, 43204, 43206, 43232, 43250, 43302, 43310, 43335, 43346, 43392, 43395, 43443, 43444, 43446, 43450, 43452, 43453, 43493, 43494, 43561, 43567, 43569, 43571, 43573, 43575, 43587, 43588, 43596, 43597, 43644, 43645, 43696, 43697, 43698, 43701, 43703, 43705, 43710, 43712, 43713, 43714, 43756, 43758, 43766, 43767, 44005, 44006, 44008, 44009, 44013, 44014, 64286, 64287, 65024, 65040, 65056, 65072, 66045, 66046, 66272, 66273, 66422, 66427, 68097, 68100, 68101, 68103, 68108, 68112, 68152, 68155, 68159, 68160, 68325, 68327, 69633, 69634, 69688, 69703, 69759, 69762, 69811, 69815, 69817, 69819, 69888, 69891, 69927, 69932, 69933, 69941, 70003, 70004, 70016, 70018, 70070, 70079, 70090, 70093, 70191, 70194, 70196, 70197, 70198, 70200, 70206, 70207, 70367, 70368, 70371, 70379, 70400, 70402, 70460, 70461, 70464, 70465, 70502, 70509, 70512, 70517, 70712, 70720, 70722, 70725, 70726, 70727, 70835, 70841, 70842, 70843, 70847, 70849, 70850, 70852, 71090, 71094, 71100, 71102, 71103, 71105, 71132, 71134, 71219, 71227, 71229, 71230, 71231, 71233, 71339, 71340, 71341, 71342, 71344, 71350, 71351, 71352, 71453, 71456, 71458, 71462, 71463, 71468, 72752, 72759, 72760, 72766, 72767, 72768, 72850, 72872, 72874, 72881, 72882, 72884, 72885, 72887, 92912, 92917, 92976, 92983, 94095, 94099, 113821, 113823, 119143, 119146, 119163, 119171, 119173, 119180, 119210, 119214, 119362, 119365, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121499, 121504, 121505, 121520, 122880, 122887, 122888, 122905, 122907, 122914, 122915, 122917, 122918, 122923, 125136, 125143, 125252, 125259, 917760, 918000, 1114112}; +static const wchar32 CAT_N[] = {48, 58, 178, 180, 185, 186, 188, 191, 1632, 1642, 1776, 1786, 1984, 1994, 2406, 2416, 2534, 2544, 2548, 2554, 2662, 2672, 2790, 2800, 2918, 2928, 2930, 2936, 3046, 3059, 3174, 3184, 3192, 3199, 3302, 3312, 3416, 3423, 3430, 3449, 3558, 3568, 3664, 3674, 3792, 3802, 3872, 3892, 4160, 4170, 4240, 4250, 4969, 4989, 5870, 5873, 6112, 6122, 6128, 6138, 6160, 6170, 6470, 6480, 6608, 6619, 6784, 6794, 6800, 6810, 6992, 7002, 7088, 7098, 7232, 7242, 7248, 7258, 8304, 8305, 8308, 8314, 8320, 8330, 8528, 8579, 8581, 8586, 9312, 9372, 9450, 9472, 10102, 10132, 11517, 11518, 12295, 12296, 12321, 12330, 12344, 12347, 12690, 12694, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 42528, 42538, 42726, 42736, 43056, 43062, 43216, 43226, 43264, 43274, 43472, 43482, 43504, 43514, 43600, 43610, 44016, 44026, 65296, 65306, 65799, 65844, 65856, 65913, 65930, 65932, 66273, 66300, 66336, 66340, 66369, 66370, 66378, 66379, 66513, 66518, 66720, 66730, 67672, 67680, 67705, 67712, 67751, 67760, 67835, 67840, 67862, 67868, 68028, 68030, 68032, 68048, 68050, 68096, 68160, 68168, 68221, 68223, 68253, 68256, 68331, 68336, 68440, 68448, 68472, 68480, 68521, 68528, 68858, 68864, 69216, 69247, 69714, 69744, 69872, 69882, 69942, 69952, 70096, 70106, 70113, 70133, 70384, 70394, 70736, 70746, 70864, 70874, 71248, 71258, 71360, 71370, 71472, 71484, 71904, 71923, 72784, 72813, 74752, 74863, 92768, 92778, 93008, 93018, 93019, 93026, 119648, 119666, 120782, 120832, 125127, 125136, 125264, 125274, 127232, 127245, 1114112}; +static const wchar32 CAT_Nd[] = {48, 58, 1632, 1642, 1776, 1786, 1984, 1994, 2406, 2416, 2534, 2544, 2662, 2672, 2790, 2800, 2918, 2928, 3046, 3056, 3174, 3184, 3302, 3312, 3430, 3440, 3558, 3568, 3664, 3674, 3792, 3802, 3872, 3882, 4160, 4170, 4240, 4250, 6112, 6122, 6160, 6170, 6470, 6480, 6608, 6618, 6784, 6794, 6800, 6810, 6992, 7002, 7088, 7098, 7232, 7242, 7248, 7258, 42528, 42538, 43216, 43226, 43264, 43274, 43472, 43482, 43504, 43514, 43600, 43610, 44016, 44026, 65296, 65306, 66720, 66730, 69734, 69744, 69872, 69882, 69942, 69952, 70096, 70106, 70384, 70394, 70736, 70746, 70864, 70874, 71248, 71258, 71360, 71370, 71472, 71482, 71904, 71914, 72784, 72794, 92768, 92778, 93008, 93018, 120782, 120832, 125264, 125274, 1114112}; +static const wchar32 CAT_Nd_DIGIT[] = {48, 58, 1632, 1642, 1776, 1786, 1984, 1994, 2406, 2416, 2534, 2544, 2662, 2672, 2790, 2800, 2918, 2928, 3046, 3056, 3174, 3184, 3302, 3312, 3430, 3440, 3558, 3568, 3664, 3674, 3792, 3802, 3872, 3882, 4160, 4170, 4240, 4250, 6112, 6122, 6160, 6170, 6470, 6480, 6608, 6618, 6784, 6794, 6800, 6810, 6992, 7002, 7088, 7098, 7232, 7242, 7248, 7258, 42528, 42538, 43216, 43226, 43264, 43274, 43472, 43482, 43504, 43514, 43600, 43610, 44016, 44026, 65296, 65306, 66720, 66730, 69734, 69744, 69872, 69882, 69942, 69952, 70096, 70106, 70384, 70394, 70736, 70746, 70864, 70874, 71248, 71258, 71360, 71370, 71472, 71482, 71904, 71914, 72784, 72794, 92768, 92778, 93008, 93018, 120782, 120832, 125264, 125274, 1114112}; +static const wchar32 CAT_Nl[] = {5870, 5873, 8544, 8579, 8581, 8585, 12295, 12296, 12321, 12330, 12344, 12347, 42726, 42736, 65856, 65909, 66369, 66370, 66378, 66379, 66513, 66518, 74752, 74863, 1114112}; +static const wchar32 CAT_Nl_IDEOGRAPH[] = {12295, 12296, 12321, 12330, 12344, 12347, 1114112}; +static const wchar32 CAT_Nl_LETTER[] = {5870, 5873, 8544, 8579, 8581, 8585, 42726, 42736, 65856, 65909, 66369, 66370, 66378, 66379, 66513, 66518, 74752, 74863, 1114112}; +static const wchar32 CAT_No[] = {178, 180, 185, 186, 188, 191, 2548, 2554, 2930, 2936, 3056, 3059, 3192, 3199, 3416, 3423, 3440, 3449, 3882, 3892, 4969, 4989, 6128, 6138, 6618, 6619, 8304, 8305, 8308, 8314, 8320, 8330, 8528, 8544, 8585, 8586, 9312, 9372, 9450, 9472, 10102, 10132, 11517, 11518, 12690, 12694, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 43056, 43062, 65799, 65844, 65909, 65913, 65930, 65932, 66273, 66300, 66336, 66340, 67672, 67680, 67705, 67712, 67751, 67760, 67835, 67840, 67862, 67868, 68028, 68030, 68032, 68048, 68050, 68096, 68160, 68168, 68221, 68223, 68253, 68256, 68331, 68336, 68440, 68448, 68472, 68480, 68521, 68528, 68858, 68864, 69216, 69247, 69714, 69734, 70113, 70133, 71482, 71484, 71914, 71923, 72794, 72813, 93019, 93026, 119648, 119666, 125127, 125136, 127232, 127245, 1114112}; +static const wchar32 CAT_No_OTHER[] = {178, 180, 185, 186, 188, 191, 2548, 2554, 2930, 2936, 3056, 3059, 3192, 3199, 3416, 3423, 3440, 3449, 3882, 3892, 4969, 4989, 6128, 6138, 6618, 6619, 8304, 8305, 8308, 8314, 8320, 8330, 8528, 8544, 8585, 8586, 9312, 9372, 9450, 9472, 10102, 10132, 11517, 11518, 12690, 12694, 12832, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 43056, 43062, 65799, 65844, 65909, 65913, 65930, 65932, 66273, 66300, 66336, 66340, 67672, 67680, 67705, 67712, 67751, 67760, 67835, 67840, 67862, 67868, 68028, 68030, 68032, 68048, 68050, 68096, 68160, 68168, 68221, 68223, 68253, 68256, 68331, 68336, 68440, 68448, 68472, 68480, 68521, 68528, 68858, 68864, 69216, 69247, 69714, 69734, 70113, 70133, 71482, 71484, 71914, 71923, 72794, 72813, 93019, 93026, 119648, 119666, 125127, 125136, 127232, 127245, 1114112}; +static const wchar32 CAT_P[] = {33, 36, 37, 43, 44, 48, 58, 60, 63, 65, 91, 94, 95, 96, 123, 124, 125, 126, 161, 162, 167, 168, 171, 172, 182, 184, 187, 188, 191, 192, 894, 895, 903, 904, 1370, 1376, 1417, 1419, 1470, 1471, 1472, 1473, 1475, 1476, 1478, 1479, 1523, 1525, 1545, 1547, 1548, 1550, 1563, 1564, 1566, 1568, 1642, 1646, 1748, 1749, 1792, 1806, 2039, 2042, 2096, 2111, 2142, 2143, 2404, 2406, 2416, 2417, 2800, 2801, 3572, 3573, 3663, 3664, 3674, 3676, 3844, 3859, 3860, 3861, 3898, 3902, 3973, 3974, 4048, 4053, 4057, 4059, 4170, 4176, 4347, 4348, 4960, 4969, 5120, 5121, 5741, 5743, 5787, 5789, 5867, 5870, 5941, 5943, 6100, 6103, 6104, 6107, 6144, 6155, 6468, 6470, 6686, 6688, 6816, 6823, 6824, 6830, 7002, 7009, 7164, 7168, 7227, 7232, 7294, 7296, 7360, 7368, 7379, 7380, 8208, 8232, 8240, 8260, 8261, 8274, 8275, 8287, 8317, 8319, 8333, 8335, 8968, 8972, 9001, 9003, 10088, 10102, 10181, 10183, 10214, 10224, 10627, 10649, 10712, 10716, 10748, 10750, 11513, 11517, 11518, 11520, 11632, 11633, 11776, 11823, 11824, 11845, 12289, 12292, 12296, 12306, 12308, 12320, 12336, 12337, 12349, 12350, 12448, 12449, 12539, 12540, 42238, 42240, 42509, 42512, 42611, 42612, 42622, 42623, 42738, 42744, 43124, 43128, 43214, 43216, 43256, 43259, 43260, 43261, 43310, 43312, 43359, 43360, 43457, 43470, 43486, 43488, 43612, 43616, 43742, 43744, 43760, 43762, 44011, 44012, 64830, 64832, 65040, 65050, 65072, 65107, 65108, 65122, 65123, 65124, 65128, 65129, 65130, 65132, 65281, 65284, 65285, 65291, 65292, 65296, 65306, 65308, 65311, 65313, 65339, 65342, 65343, 65344, 65371, 65372, 65373, 65374, 65375, 65382, 65792, 65795, 66463, 66464, 66512, 66513, 66927, 66928, 67671, 67672, 67871, 67872, 67903, 67904, 68176, 68185, 68223, 68224, 68336, 68343, 68409, 68416, 68505, 68509, 69703, 69710, 69819, 69821, 69822, 69826, 69952, 69956, 70004, 70006, 70085, 70090, 70093, 70094, 70107, 70108, 70109, 70112, 70200, 70206, 70313, 70314, 70731, 70736, 70747, 70748, 70749, 70750, 70854, 70855, 71105, 71128, 71233, 71236, 71264, 71277, 71484, 71487, 72769, 72774, 72816, 72818, 74864, 74869, 92782, 92784, 92917, 92918, 92983, 92988, 92996, 92997, 113823, 113824, 121479, 121484, 125278, 125280, 1114112}; +static const wchar32 CAT_Pc[] = {95, 96, 8255, 8257, 8276, 8277, 65075, 65077, 65101, 65104, 65343, 65344, 1114112}; +static const wchar32 CAT_Pc_CONNECTOR[] = {95, 96, 8255, 8257, 8276, 8277, 65075, 65077, 65101, 65104, 65343, 65344, 1114112}; +static const wchar32 CAT_Pd[] = {45, 46, 1418, 1419, 1470, 1471, 5120, 5121, 6150, 6151, 8208, 8214, 11799, 11800, 11802, 11803, 11834, 11836, 11840, 11841, 12316, 12317, 12336, 12337, 12448, 12449, 65073, 65075, 65112, 65113, 65123, 65124, 65293, 65294, 1114112}; +static const wchar32 CAT_Pd_DASH[] = {1470, 1471, 8210, 8214, 11834, 11836, 12316, 12317, 12336, 12337, 65073, 65075, 65112, 65113, 1114112}; +static const wchar32 CAT_Pd_HYPHEN[] = {45, 46, 1418, 1419, 5120, 5121, 6150, 6151, 8208, 8210, 11799, 11800, 11802, 11803, 11840, 11841, 12448, 12449, 65123, 65124, 65293, 65294, 1114112}; +static const wchar32 CAT_Pe[] = {41, 42, 93, 94, 125, 126, 3899, 3900, 3901, 3902, 5788, 5789, 8262, 8263, 8318, 8319, 8334, 8335, 8969, 8970, 8971, 8972, 9002, 9003, 10089, 10090, 10091, 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, 10102, 10182, 10183, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10649, 10713, 10714, 10715, 10716, 10749, 10750, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 11818, 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12306, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 12318, 12320, 64830, 64831, 65048, 65049, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65096, 65097, 65114, 65115, 65116, 65117, 65118, 65119, 65289, 65290, 65341, 65342, 65373, 65374, 65376, 65377, 65379, 65380, 1114112}; +static const wchar32 CAT_Pe_END[] = {41, 42, 93, 94, 125, 126, 3899, 3900, 3901, 3902, 5788, 5789, 8262, 8263, 8318, 8319, 8334, 8335, 8969, 8970, 8971, 8972, 9002, 9003, 10089, 10090, 10091, 10092, 10093, 10094, 10097, 10098, 10099, 10100, 10101, 10102, 10182, 10183, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10224, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10649, 10713, 10714, 10715, 10716, 10749, 10750, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 11818, 12297, 12298, 12299, 12300, 12305, 12306, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12316, 64830, 64831, 65048, 65049, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65089, 65096, 65097, 65114, 65115, 65116, 65117, 65118, 65119, 65289, 65290, 65341, 65342, 65373, 65374, 65376, 65377, 1114112}; +static const wchar32 CAT_Pe_QUOTE[] = {10095, 10096, 12301, 12302, 12303, 12304, 12318, 12320, 65090, 65091, 65092, 65093, 65379, 65380, 1114112}; +static const wchar32 CAT_Pf[] = {187, 188, 8217, 8218, 8221, 8222, 8250, 8251, 11779, 11780, 11781, 11782, 11786, 11787, 11789, 11790, 11805, 11806, 11809, 11810, 1114112}; +static const wchar32 CAT_Pf_QUOTE[] = {187, 188, 8221, 8222, 8250, 8251, 11779, 11780, 11781, 11782, 11786, 11787, 11789, 11790, 11805, 11806, 11809, 11810, 1114112}; +static const wchar32 CAT_Pf_SINGLE_QUOTE[] = {8217, 8218, 1114112}; +static const wchar32 CAT_Pi[] = {171, 172, 8216, 8217, 8219, 8221, 8223, 8224, 8249, 8250, 11778, 11779, 11780, 11781, 11785, 11786, 11788, 11789, 11804, 11805, 11808, 11809, 1114112}; +static const wchar32 CAT_Pi_QUOTE[] = {171, 172, 8220, 8221, 8223, 8224, 8249, 8250, 11778, 11779, 11780, 11781, 11785, 11786, 11788, 11789, 11804, 11805, 11808, 11809, 1114112}; +static const wchar32 CAT_Pi_SINGLE_QUOTE[] = {8216, 8217, 8219, 8220, 1114112}; +static const wchar32 CAT_Po[] = {33, 36, 37, 40, 42, 43, 44, 45, 46, 48, 58, 60, 63, 65, 92, 93, 161, 162, 167, 168, 182, 184, 191, 192, 894, 895, 903, 904, 1370, 1376, 1417, 1418, 1472, 1473, 1475, 1476, 1478, 1479, 1523, 1525, 1545, 1547, 1548, 1550, 1563, 1564, 1566, 1568, 1642, 1646, 1748, 1749, 1792, 1806, 2039, 2042, 2096, 2111, 2142, 2143, 2404, 2406, 2416, 2417, 2800, 2801, 3572, 3573, 3663, 3664, 3674, 3676, 3844, 3859, 3860, 3861, 3973, 3974, 4048, 4053, 4057, 4059, 4170, 4176, 4347, 4348, 4960, 4969, 5741, 5743, 5867, 5870, 5941, 5943, 6100, 6103, 6104, 6107, 6144, 6150, 6151, 6155, 6468, 6470, 6686, 6688, 6816, 6823, 6824, 6830, 7002, 7009, 7164, 7168, 7227, 7232, 7294, 7296, 7360, 7368, 7379, 7380, 8214, 8216, 8224, 8232, 8240, 8249, 8251, 8255, 8257, 8260, 8263, 8274, 8275, 8276, 8277, 8287, 11513, 11517, 11518, 11520, 11632, 11633, 11776, 11778, 11782, 11785, 11787, 11788, 11790, 11799, 11800, 11802, 11803, 11804, 11806, 11808, 11818, 11823, 11824, 11834, 11836, 11840, 11841, 11842, 11843, 11845, 12289, 12292, 12349, 12350, 12539, 12540, 42238, 42240, 42509, 42512, 42611, 42612, 42622, 42623, 42738, 42744, 43124, 43128, 43214, 43216, 43256, 43259, 43260, 43261, 43310, 43312, 43359, 43360, 43457, 43470, 43486, 43488, 43612, 43616, 43742, 43744, 43760, 43762, 44011, 44012, 65040, 65047, 65049, 65050, 65072, 65073, 65093, 65095, 65097, 65101, 65104, 65107, 65108, 65112, 65119, 65122, 65128, 65129, 65130, 65132, 65281, 65284, 65285, 65288, 65290, 65291, 65292, 65293, 65294, 65296, 65306, 65308, 65311, 65313, 65340, 65341, 65377, 65378, 65380, 65382, 65792, 65795, 66463, 66464, 66512, 66513, 66927, 66928, 67671, 67672, 67871, 67872, 67903, 67904, 68176, 68185, 68223, 68224, 68336, 68343, 68409, 68416, 68505, 68509, 69703, 69710, 69819, 69821, 69822, 69826, 69952, 69956, 70004, 70006, 70085, 70090, 70093, 70094, 70107, 70108, 70109, 70112, 70200, 70206, 70313, 70314, 70731, 70736, 70747, 70748, 70749, 70750, 70854, 70855, 71105, 71128, 71233, 71236, 71264, 71277, 71484, 71487, 72769, 72774, 72816, 72818, 74864, 74869, 92782, 92784, 92917, 92918, 92983, 92988, 92996, 92997, 113823, 113824, 121479, 121484, 125278, 125280, 1114112}; +static const wchar32 CAT_Po_EXTENDER[] = {183, 184, 11825, 11826, 1114112}; +static const wchar32 CAT_Po_HYPHEN[] = {12539, 12540, 65381, 65382, 1114112}; +static const wchar32 CAT_Po_OTHER[] = {35, 36, 37, 39, 42, 43, 47, 48, 64, 65, 92, 93, 161, 162, 167, 168, 182, 183, 191, 192, 1370, 1376, 1472, 1473, 1475, 1476, 1478, 1479, 1523, 1525, 1545, 1547, 1549, 1550, 1566, 1567, 1642, 1646, 1792, 1793, 1802, 1806, 2039, 2040, 2096, 2111, 2142, 2143, 2416, 2417, 2800, 2801, 3572, 3573, 3663, 3664, 3674, 3676, 3844, 3859, 3860, 3861, 3973, 3974, 4048, 4053, 4057, 4059, 4170, 4176, 4347, 4348, 4960, 4962, 4968, 4969, 5741, 5742, 5867, 5870, 5941, 5943, 6100, 6103, 6104, 6107, 6144, 6145, 6149, 6150, 6151, 6152, 6154, 6155, 6686, 6688, 6816, 6823, 6824, 6830, 7002, 7009, 7164, 7168, 7227, 7232, 7294, 7296, 7360, 7368, 7379, 7380, 8214, 8216, 8224, 8230, 8231, 8232, 8240, 8242, 8248, 8249, 8251, 8252, 8254, 8255, 8257, 8260, 8266, 8271, 8272, 8274, 8275, 8276, 8277, 8279, 8280, 8287, 11516, 11517, 11519, 11520, 11632, 11633, 11776, 11778, 11782, 11785, 11787, 11788, 11790, 11799, 11800, 11802, 11803, 11804, 11806, 11808, 11818, 11822, 11824, 11825, 11827, 11828, 11830, 11834, 11837, 11840, 11843, 11845, 12291, 12292, 12349, 12350, 42611, 42612, 42622, 42623, 42738, 42739, 43124, 43128, 43256, 43259, 43260, 43261, 43310, 43312, 43359, 43360, 43457, 43470, 43486, 43488, 43612, 43613, 43742, 43744, 43760, 43762, 44011, 44012, 65072, 65073, 65093, 65095, 65097, 65101, 65119, 65122, 65128, 65129, 65130, 65132, 65283, 65284, 65285, 65287, 65290, 65291, 65295, 65296, 65312, 65313, 65340, 65341, 65792, 65795, 66463, 66464, 66512, 66513, 66927, 66928, 67671, 67672, 67871, 67872, 67903, 67904, 68176, 68182, 68184, 68185, 68223, 68224, 68336, 68343, 68409, 68416, 68505, 68509, 69705, 69710, 69819, 69821, 69822, 69824, 69952, 69953, 70004, 70006, 70087, 70090, 70093, 70094, 70107, 70108, 70109, 70112, 70202, 70206, 70313, 70314, 70734, 70736, 70747, 70748, 70749, 70750, 70854, 70855, 71105, 71106, 71108, 71128, 71235, 71236, 71264, 71277, 71484, 71487, 72771, 72774, 72816, 72818, 74864, 74865, 74867, 74869, 92983, 92988, 92996, 92997, 121483, 121484, 1114112}; +static const wchar32 CAT_Po_QUOTE[] = {34, 35, 8243, 8245, 8246, 8248, 8279, 8280, 65282, 65283, 1114112}; +static const wchar32 CAT_Po_SINGLE_QUOTE[] = {39, 40, 8242, 8243, 8245, 8246, 65287, 65288, 1114112}; +static const wchar32 CAT_Po_TERMINAL[] = {33, 34, 44, 45, 46, 47, 58, 60, 63, 64, 894, 895, 903, 904, 1417, 1418, 1548, 1549, 1563, 1564, 1567, 1568, 1748, 1749, 1793, 1802, 2040, 2042, 2404, 2406, 4962, 4968, 5742, 5743, 6145, 6149, 6152, 6154, 6468, 6470, 8230, 8231, 8252, 8254, 8263, 8266, 8271, 8272, 11513, 11516, 11518, 11519, 11822, 11823, 11826, 11827, 11828, 11830, 11836, 11837, 11841, 11842, 12289, 12291, 42238, 42240, 42509, 42512, 42739, 42744, 43214, 43216, 43613, 43616, 65040, 65047, 65049, 65050, 65104, 65107, 65108, 65112, 65281, 65282, 65292, 65293, 65294, 65295, 65306, 65308, 65311, 65312, 65377, 65378, 65380, 65381, 68182, 68184, 69703, 69705, 69824, 69826, 69953, 69956, 70085, 70087, 70200, 70202, 70731, 70734, 71106, 71108, 71233, 71235, 72769, 72771, 74865, 74867, 92782, 92784, 92917, 92918, 113823, 113824, 121479, 121483, 125278, 125280, 1114112}; +static const wchar32 CAT_Ps[] = {40, 41, 91, 92, 123, 124, 3898, 3899, 3900, 3901, 5787, 5788, 8218, 8219, 8222, 8223, 8261, 8262, 8317, 8318, 8333, 8334, 8968, 8969, 8970, 8971, 9001, 9002, 10088, 10089, 10090, 10091, 10092, 10093, 10094, 10095, 10096, 10097, 10098, 10099, 10100, 10101, 10181, 10182, 10214, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10627, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10712, 10713, 10714, 10715, 10748, 10749, 11810, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 11842, 11843, 12296, 12297, 12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 12317, 12318, 64831, 64832, 65047, 65048, 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65089, 65090, 65091, 65092, 65095, 65096, 65113, 65114, 65115, 65116, 65117, 65118, 65288, 65289, 65339, 65340, 65371, 65372, 65375, 65376, 65378, 65379, 1114112}; +static const wchar32 CAT_Ps_QUOTE[] = {8222, 8223, 10094, 10095, 11842, 11843, 12300, 12301, 12302, 12303, 12317, 12318, 65089, 65090, 65091, 65092, 65378, 65379, 1114112}; +static const wchar32 CAT_Ps_SINGLE_QUOTE[] = {8218, 8219, 1114112}; +static const wchar32 CAT_Ps_START[] = {40, 41, 91, 92, 123, 124, 3898, 3899, 3900, 3901, 5787, 5788, 8261, 8262, 8317, 8318, 8333, 8334, 8968, 8969, 8970, 8971, 9001, 9002, 10088, 10089, 10090, 10091, 10092, 10093, 10096, 10097, 10098, 10099, 10100, 10101, 10181, 10182, 10214, 10215, 10216, 10217, 10218, 10219, 10220, 10221, 10222, 10223, 10627, 10628, 10629, 10630, 10631, 10632, 10633, 10634, 10635, 10636, 10637, 10638, 10639, 10640, 10641, 10642, 10643, 10644, 10645, 10646, 10647, 10648, 10712, 10713, 10714, 10715, 10748, 10749, 11810, 11811, 11812, 11813, 11814, 11815, 11816, 11817, 12296, 12297, 12298, 12299, 12304, 12305, 12308, 12309, 12310, 12311, 12312, 12313, 12314, 12315, 64831, 64832, 65047, 65048, 65077, 65078, 65079, 65080, 65081, 65082, 65083, 65084, 65085, 65086, 65087, 65088, 65095, 65096, 65113, 65114, 65115, 65116, 65117, 65118, 65288, 65289, 65339, 65340, 65371, 65372, 65375, 65376, 1114112}; +static const wchar32 CAT_S[] = {36, 37, 43, 44, 60, 63, 94, 95, 96, 97, 124, 125, 126, 127, 162, 167, 168, 170, 172, 173, 174, 178, 180, 181, 184, 185, 215, 216, 247, 248, 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 885, 886, 900, 902, 1014, 1015, 1154, 1155, 1421, 1424, 1542, 1545, 1547, 1548, 1550, 1552, 1758, 1759, 1769, 1770, 1789, 1791, 2038, 2039, 2546, 2548, 2554, 2556, 2801, 2802, 2928, 2929, 3059, 3067, 3199, 3200, 3407, 3408, 3449, 3450, 3647, 3648, 3841, 3844, 3859, 3860, 3861, 3864, 3866, 3872, 3892, 3893, 3894, 3895, 3896, 3897, 4030, 4038, 4039, 4045, 4046, 4048, 4053, 4057, 4254, 4256, 5008, 5018, 6107, 6108, 6464, 6465, 6622, 6656, 7009, 7019, 7028, 7037, 8125, 8126, 8127, 8130, 8141, 8144, 8157, 8160, 8173, 8176, 8189, 8191, 8260, 8261, 8274, 8275, 8314, 8317, 8330, 8333, 8352, 8383, 8448, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8473, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8512, 8517, 8522, 8526, 8527, 8528, 8586, 8588, 8592, 8968, 8972, 9001, 9003, 9215, 9216, 9255, 9280, 9291, 9372, 9450, 9472, 10088, 10132, 10181, 10183, 10214, 10224, 10627, 10649, 10712, 10716, 10748, 10750, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11493, 11499, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12292, 12293, 12306, 12308, 12320, 12321, 12342, 12344, 12350, 12352, 12443, 12445, 12688, 12690, 12694, 12704, 12736, 12772, 12800, 12831, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 13055, 13056, 13312, 19904, 19968, 42128, 42183, 42752, 42775, 42784, 42786, 42889, 42891, 43048, 43052, 43062, 43066, 43639, 43642, 43867, 43868, 64297, 64298, 64434, 64450, 65020, 65022, 65122, 65123, 65124, 65127, 65129, 65130, 65284, 65285, 65291, 65292, 65308, 65311, 65342, 65343, 65344, 65345, 65372, 65373, 65374, 65375, 65504, 65511, 65512, 65519, 65532, 65534, 65847, 65856, 65913, 65930, 65932, 65935, 65936, 65948, 65952, 65953, 66000, 66045, 67703, 67705, 68296, 68297, 71487, 71488, 92988, 92992, 92997, 92998, 113820, 113821, 118784, 119030, 119040, 119079, 119081, 119141, 119146, 119149, 119171, 119173, 119180, 119210, 119214, 119273, 119296, 119362, 119365, 119366, 119552, 119639, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 120832, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121479, 126704, 126706, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 1114112}; +static const wchar32 CAT_Sc[] = {36, 37, 162, 166, 1423, 1424, 1547, 1548, 2546, 2548, 2555, 2556, 2801, 2802, 3065, 3066, 3647, 3648, 6107, 6108, 8352, 8383, 43064, 43065, 65020, 65021, 65129, 65130, 65284, 65285, 65504, 65506, 65509, 65511, 1114112}; +static const wchar32 CAT_Sc_CURRENCY[] = {36, 37, 162, 166, 1423, 1424, 1547, 1548, 2546, 2548, 2555, 2556, 2801, 2802, 3065, 3066, 3647, 3648, 6107, 6108, 8352, 8383, 43064, 43065, 65020, 65021, 65129, 65130, 65284, 65285, 65504, 65506, 65509, 65511, 1114112}; +static const wchar32 CAT_Sk[] = {94, 95, 96, 97, 168, 169, 175, 176, 180, 181, 184, 185, 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 885, 886, 900, 902, 8125, 8126, 8127, 8130, 8141, 8144, 8157, 8160, 8173, 8176, 8189, 8191, 12443, 12445, 42752, 42775, 42784, 42786, 42889, 42891, 43867, 43868, 64434, 64450, 65342, 65343, 65344, 65345, 65507, 65508, 127995, 128000, 1114112}; +static const wchar32 CAT_Sk_MODIFIER[] = {94, 95, 96, 97, 168, 169, 175, 176, 180, 181, 184, 185, 706, 710, 722, 736, 741, 748, 749, 750, 751, 768, 885, 886, 900, 902, 8125, 8126, 8127, 8130, 8141, 8144, 8157, 8160, 8173, 8176, 8189, 8191, 12443, 12445, 42752, 42775, 42784, 42786, 42889, 42891, 43867, 43868, 64434, 64450, 65342, 65343, 65344, 65345, 65507, 65508, 127995, 128000, 1114112}; +static const wchar32 CAT_Sm[] = {43, 44, 60, 63, 124, 125, 126, 127, 172, 173, 177, 178, 215, 216, 247, 248, 1014, 1015, 1542, 1545, 8260, 8261, 8274, 8275, 8314, 8317, 8330, 8333, 8472, 8473, 8512, 8517, 8523, 8524, 8592, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8992, 8994, 9084, 9085, 9115, 9140, 9180, 9186, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10176, 10181, 10183, 10214, 10224, 10240, 10496, 10627, 10649, 10712, 10716, 10748, 10750, 11008, 11056, 11077, 11079, 11085, 64297, 64298, 65122, 65123, 65124, 65127, 65291, 65292, 65308, 65311, 65372, 65373, 65374, 65375, 65506, 65507, 65513, 65517, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 126704, 126706, 1114112}; +static const wchar32 CAT_Sm_MATH[] = {43, 44, 60, 63, 124, 125, 126, 127, 172, 173, 177, 178, 215, 216, 247, 248, 1014, 1015, 1542, 1545, 8260, 8261, 8274, 8275, 8314, 8315, 8316, 8317, 8330, 8331, 8332, 8333, 8472, 8473, 8512, 8517, 8523, 8524, 8592, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8722, 8723, 8960, 8992, 8994, 9084, 9085, 9115, 9140, 9180, 9186, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10176, 10181, 10183, 10214, 10224, 10240, 10496, 10627, 10649, 10712, 10716, 10748, 10750, 11008, 11056, 11077, 11079, 11085, 64297, 64298, 65122, 65123, 65124, 65127, 65291, 65292, 65308, 65311, 65372, 65373, 65374, 65375, 65506, 65507, 65513, 65517, 120513, 120514, 120539, 120540, 120571, 120572, 120597, 120598, 120629, 120630, 120655, 120656, 120687, 120688, 120713, 120714, 120745, 120746, 120771, 120772, 126704, 126706, 1114112}; +static const wchar32 CAT_Sm_MINUS[] = {8315, 8316, 8331, 8332, 8722, 8723, 1114112}; +static const wchar32 CAT_So[] = {166, 167, 169, 170, 174, 175, 176, 177, 1154, 1155, 1421, 1423, 1550, 1552, 1758, 1759, 1769, 1770, 1789, 1791, 2038, 2039, 2554, 2555, 2928, 2929, 3059, 3065, 3066, 3067, 3199, 3200, 3407, 3408, 3449, 3450, 3841, 3844, 3859, 3860, 3861, 3864, 3866, 3872, 3892, 3893, 3894, 3895, 3896, 3897, 4030, 4038, 4039, 4045, 4046, 4048, 4053, 4057, 4254, 4256, 5008, 5018, 6464, 6465, 6622, 6656, 7009, 7019, 7028, 7037, 8448, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8472, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8522, 8523, 8524, 8526, 8527, 8528, 8586, 8588, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, 8992, 8994, 9001, 9003, 9084, 9085, 9115, 9140, 9180, 9186, 9215, 9216, 9255, 9280, 9291, 9372, 9450, 9472, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10088, 10132, 10176, 10240, 10496, 11008, 11056, 11077, 11079, 11085, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11493, 11499, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12292, 12293, 12306, 12308, 12320, 12321, 12342, 12344, 12350, 12352, 12688, 12690, 12694, 12704, 12736, 12772, 12800, 12831, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 13055, 13056, 13312, 19904, 19968, 42128, 42183, 43048, 43052, 43062, 43064, 43065, 43066, 43639, 43642, 65021, 65022, 65508, 65509, 65512, 65513, 65517, 65519, 65532, 65534, 65847, 65856, 65913, 65930, 65932, 65935, 65936, 65948, 65952, 65953, 66000, 66045, 67703, 67705, 68296, 68297, 71487, 71488, 92988, 92992, 92997, 92998, 113820, 113821, 118784, 119030, 119040, 119079, 119081, 119141, 119146, 119149, 119171, 119173, 119180, 119210, 119214, 119273, 119296, 119362, 119365, 119366, 119552, 119639, 120832, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121479, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 127995, 128000, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 1114112}; +static const wchar32 CAT_So_OTHER[] = {166, 167, 169, 170, 174, 175, 176, 177, 1154, 1155, 1421, 1423, 1550, 1552, 1758, 1759, 1769, 1770, 1789, 1791, 2038, 2039, 2554, 2555, 2928, 2929, 3059, 3065, 3066, 3067, 3199, 3200, 3407, 3408, 3449, 3450, 3841, 3844, 3859, 3860, 3861, 3864, 3866, 3872, 3892, 3893, 3894, 3895, 3896, 3897, 4030, 4038, 4039, 4045, 4046, 4048, 4053, 4057, 4254, 4256, 5008, 5018, 6464, 6465, 6622, 6656, 7009, 7019, 7028, 7037, 8448, 8450, 8451, 8455, 8456, 8458, 8468, 8469, 8470, 8472, 8478, 8484, 8485, 8486, 8487, 8488, 8489, 8490, 8494, 8495, 8506, 8508, 8522, 8523, 8524, 8526, 8527, 8528, 8586, 8588, 8597, 8602, 8604, 8608, 8609, 8611, 8612, 8614, 8615, 8622, 8623, 8654, 8656, 8658, 8659, 8660, 8661, 8692, 8960, 8968, 8972, 8992, 8994, 9001, 9003, 9084, 9085, 9115, 9140, 9180, 9186, 9215, 9216, 9255, 9280, 9291, 9372, 9450, 9472, 9655, 9656, 9665, 9666, 9720, 9728, 9839, 9840, 10088, 10132, 10176, 10240, 10496, 11008, 11056, 11077, 11079, 11085, 11124, 11126, 11158, 11160, 11194, 11197, 11209, 11210, 11218, 11244, 11248, 11493, 11499, 11904, 11930, 11931, 12020, 12032, 12246, 12272, 12284, 12292, 12293, 12306, 12308, 12320, 12321, 12342, 12344, 12350, 12352, 12688, 12690, 12694, 12704, 12736, 12772, 12800, 12831, 12842, 12872, 12880, 12881, 12896, 12928, 12938, 12977, 12992, 13055, 13056, 13312, 19904, 19968, 42128, 42183, 43048, 43052, 43062, 43064, 43065, 43066, 43639, 43642, 65021, 65022, 65508, 65509, 65512, 65513, 65517, 65519, 65532, 65534, 65847, 65856, 65913, 65930, 65932, 65935, 65936, 65948, 65952, 65953, 66000, 66045, 67703, 67705, 68296, 68297, 71487, 71488, 92988, 92992, 92997, 92998, 113820, 113821, 118784, 119030, 119040, 119079, 119081, 119141, 119146, 119149, 119171, 119173, 119180, 119210, 119214, 119273, 119296, 119362, 119365, 119366, 119552, 119639, 120832, 121344, 121399, 121403, 121453, 121461, 121462, 121476, 121477, 121479, 126976, 127020, 127024, 127124, 127136, 127151, 127153, 127168, 127169, 127184, 127185, 127222, 127248, 127279, 127280, 127340, 127344, 127405, 127462, 127491, 127504, 127548, 127552, 127561, 127568, 127570, 127744, 127995, 128000, 128723, 128736, 128749, 128752, 128759, 128768, 128884, 128896, 128981, 129024, 129036, 129040, 129096, 129104, 129114, 129120, 129160, 129168, 129198, 129296, 129311, 129312, 129320, 129328, 129329, 129331, 129343, 129344, 129356, 129360, 129375, 129408, 129426, 129472, 129473, 1114112}; +static const wchar32 CAT_Z[] = {32, 33, 160, 161, 5760, 5761, 8192, 8204, 8232, 8234, 8239, 8240, 8287, 8288, 12288, 12289, 1114112}; +static const wchar32 CAT_Zl[] = {8232, 8233, 1114112}; +static const wchar32 CAT_Zl_LINE[] = {8232, 8233, 1114112}; +static const wchar32 CAT_Zp[] = {8233, 8234, 1114112}; +static const wchar32 CAT_Zp_PARAGRAPH[] = {8233, 8234, 1114112}; +static const wchar32 CAT_Zs[] = {32, 33, 160, 161, 5760, 5761, 8192, 8204, 8239, 8240, 8287, 8288, 12288, 12289, 1114112}; +static const wchar32 CAT_Zs_SPACE[] = {32, 33, 160, 161, 5760, 5761, 8192, 8203, 8239, 8240, 8287, 8288, 12288, 12289, 1114112}; +static const wchar32 CAT_Zs_ZWSPACE[] = {8203, 8204, 1114112}; + +static const TCategoryRanges CATEGORY_RANGES[] = { + {1274, CAT_Cn_UNASSIGNED}, + {1255, CAT_Lu_UPPER}, + {1267, CAT_Ll_LOWER}, + {21, CAT_Lt_TITLE}, + {59, CAT_Lm_EXTENDER}, + {63, CAT_Lm_LETTER}, + {849, CAT_Lo_OTHER}, + {23, CAT_Lo_IDEOGRAPH}, + {13, CAT_Lo_KATAKANA}, + {7, CAT_Lo_HIRAGANA}, + {5, CAT_Lo_LEADING}, + {5, CAT_Lo_VOWEL}, + {5, CAT_Lo_TRAILING}, + {571, CAT_Mn_NONSPACING}, + {11, CAT_Me_ENCLOSING}, + {311, CAT_Mc_SPACING}, + {109, CAT_Nd_DIGIT}, + {19, CAT_Nl_LETTER}, + {7, CAT_Nl_IDEOGRAPH}, + {121, CAT_No_OTHER}, + {15, CAT_Zs_SPACE}, + {3, CAT_Zs_ZWSPACE}, + {3, CAT_Zl_LINE}, + {3, CAT_Zp_PARAGRAPH}, + {7, CAT_Cc_ASCII}, + {3, CAT_Cc_SPACE}, + {3, CAT_Cc_SEPARATOR}, + {43, CAT_Cf_FORMAT}, + {5, CAT_Cf_JOIN}, + {19, CAT_Cf_BIDI}, + {3, CAT_Cf_ZWNBSP}, + {3, CAT_Cs_LOW}, + {3, CAT_Cs_HIGH}, + {15, CAT_Pd_DASH}, + {23, CAT_Pd_HYPHEN}, + {131, CAT_Ps_START}, + {19, CAT_Ps_QUOTE}, + {131, CAT_Pe_END}, + {15, CAT_Pe_QUOTE}, + {21, CAT_Pi_QUOTE}, + {19, CAT_Pf_QUOTE}, + {13, CAT_Pc_CONNECTOR}, + {307, CAT_Po_OTHER}, + {11, CAT_Po_QUOTE}, + {129, CAT_Po_TERMINAL}, + {5, CAT_Po_EXTENDER}, + {5, CAT_Po_HYPHEN}, + {135, CAT_Sm_MATH}, + {7, CAT_Sm_MINUS}, + {35, CAT_Sc_CURRENCY}, + {59, CAT_Sk_MODIFIER}, + {349, CAT_So_OTHER}, + {3, CAT_Ps_SINGLE_QUOTE}, + {0, nullptr}, + {5, CAT_Pi_SINGLE_QUOTE}, + {3, CAT_Pf_SINGLE_QUOTE}, + {9, CAT_Po_SINGLE_QUOTE}, +}; + +struct TNamedCategoryRanges: THashMap<TStringBuf, TCategoryRanges> { + typedef THashMap<TStringBuf, TCategoryRanges> TBase; + + TNamedCategoryRanges() { + Insert(TStringBuf("C"), CAT_C, 1284); + Insert(TStringBuf("Cc"), CAT_Cc, 5); + Insert(TStringBuf("Cc_ASCII"), CAT_Cc_ASCII, 7); + Insert(TStringBuf("Cc_SEPARATOR"), CAT_Cc_SEPARATOR, 3); + Insert(TStringBuf("Cc_SPACE"), CAT_Cc_SPACE, 3); + Insert(TStringBuf("Cf"), CAT_Cf, 37); + Insert(TStringBuf("Cf_BIDI"), CAT_Cf_BIDI, 19); + Insert(TStringBuf("Cf_FORMAT"), CAT_Cf_FORMAT, 43); + Insert(TStringBuf("Cf_JOIN"), CAT_Cf_JOIN, 5); + Insert(TStringBuf("Cf_ZWNBSP"), CAT_Cf_ZWNBSP, 3); + Insert(TStringBuf("Cn"), CAT_Cn, 1274); + Insert(TStringBuf("Cn_UNASSIGNED"), CAT_Cn_UNASSIGNED, 1274); + Insert(TStringBuf("Co"), CAT_Co, 1274); + Insert(TStringBuf("Co_PRIVATE"), CAT_Co_PRIVATE, 1274); + Insert(TStringBuf("Cs"), CAT_Cs, 3); + Insert(TStringBuf("Cs_HIGH"), CAT_Cs_HIGH, 3); + Insert(TStringBuf("Cs_LOW"), CAT_Cs_LOW, 3); + Insert(TStringBuf("L"), CAT_L, 1143); + Insert(TStringBuf("Ll"), CAT_Ll, 1267); + Insert(TStringBuf("Ll_LOWER"), CAT_Ll_LOWER, 1267); + Insert(TStringBuf("Lm"), CAT_Lm, 115); + Insert(TStringBuf("Lm_EXTENDER"), CAT_Lm_EXTENDER, 59); + Insert(TStringBuf("Lm_LETTER"), CAT_Lm_LETTER, 63); + Insert(TStringBuf("Lo"), CAT_Lo, 891); + Insert(TStringBuf("Lo_HIRAGANA"), CAT_Lo_HIRAGANA, 7); + Insert(TStringBuf("Lo_IDEOGRAPH"), CAT_Lo_IDEOGRAPH, 23); + Insert(TStringBuf("Lo_KATAKANA"), CAT_Lo_KATAKANA, 13); + Insert(TStringBuf("Lo_LEADING"), CAT_Lo_LEADING, 5); + Insert(TStringBuf("Lo_OTHER"), CAT_Lo_OTHER, 849); + Insert(TStringBuf("Lo_TRAILING"), CAT_Lo_TRAILING, 5); + Insert(TStringBuf("Lo_VOWEL"), CAT_Lo_VOWEL, 5); + Insert(TStringBuf("Lt"), CAT_Lt, 21); + Insert(TStringBuf("Lt_TITLE"), CAT_Lt_TITLE, 21); + Insert(TStringBuf("Lu"), CAT_Lu, 1255); + Insert(TStringBuf("Lu_UPPER"), CAT_Lu_UPPER, 1255); + Insert(TStringBuf("M"), CAT_M, 501); + Insert(TStringBuf("Mc"), CAT_Mc, 311); + Insert(TStringBuf("Mc_SPACING"), CAT_Mc_SPACING, 311); + Insert(TStringBuf("Me"), CAT_Me, 11); + Insert(TStringBuf("Me_ENCLOSING"), CAT_Me_ENCLOSING, 11); + Insert(TStringBuf("Mn"), CAT_Mn, 571); + Insert(TStringBuf("Mn_NONSPACING"), CAT_Mn_NONSPACING, 571); + Insert(TStringBuf("N"), CAT_N, 231); + Insert(TStringBuf("Nd"), CAT_Nd, 109); + Insert(TStringBuf("Nd_DIGIT"), CAT_Nd_DIGIT, 109); + Insert(TStringBuf("Nl"), CAT_Nl, 25); + Insert(TStringBuf("Nl_IDEOGRAPH"), CAT_Nl_IDEOGRAPH, 7); + Insert(TStringBuf("Nl_LETTER"), CAT_Nl_LETTER, 19); + Insert(TStringBuf("No"), CAT_No, 121); + Insert(TStringBuf("No_OTHER"), CAT_No_OTHER, 121); + Insert(TStringBuf("P"), CAT_P, 337); + Insert(TStringBuf("Pc"), CAT_Pc, 13); + Insert(TStringBuf("Pc_CONNECTOR"), CAT_Pc_CONNECTOR, 13); + Insert(TStringBuf("Pd"), CAT_Pd, 35); + Insert(TStringBuf("Pd_DASH"), CAT_Pd_DASH, 15); + Insert(TStringBuf("Pd_HYPHEN"), CAT_Pd_HYPHEN, 23); + Insert(TStringBuf("Pe"), CAT_Pe, 145); + Insert(TStringBuf("Pe_END"), CAT_Pe_END, 131); + Insert(TStringBuf("Pe_QUOTE"), CAT_Pe_QUOTE, 15); + Insert(TStringBuf("Pf"), CAT_Pf, 21); + Insert(TStringBuf("Pf_QUOTE"), CAT_Pf_QUOTE, 19); + Insert(TStringBuf("Pf_SINGLE_QUOTE"), CAT_Pf_SINGLE_QUOTE, 3); + Insert(TStringBuf("Pi"), CAT_Pi, 23); + Insert(TStringBuf("Pi_QUOTE"), CAT_Pi_QUOTE, 21); + Insert(TStringBuf("Pi_SINGLE_QUOTE"), CAT_Pi_SINGLE_QUOTE, 5); + Insert(TStringBuf("Po"), CAT_Po, 331); + Insert(TStringBuf("Po_EXTENDER"), CAT_Po_EXTENDER, 5); + Insert(TStringBuf("Po_HYPHEN"), CAT_Po_HYPHEN, 5); + Insert(TStringBuf("Po_OTHER"), CAT_Po_OTHER, 307); + Insert(TStringBuf("Po_QUOTE"), CAT_Po_QUOTE, 11); + Insert(TStringBuf("Po_SINGLE_QUOTE"), CAT_Po_SINGLE_QUOTE, 9); + Insert(TStringBuf("Po_TERMINAL"), CAT_Po_TERMINAL, 129); + Insert(TStringBuf("Ps"), CAT_Ps, 151); + Insert(TStringBuf("Ps_QUOTE"), CAT_Ps_QUOTE, 19); + Insert(TStringBuf("Ps_SINGLE_QUOTE"), CAT_Ps_SINGLE_QUOTE, 3); + Insert(TStringBuf("Ps_START"), CAT_Ps_START, 131); + Insert(TStringBuf("S"), CAT_S, 437); + Insert(TStringBuf("Sc"), CAT_Sc, 35); + Insert(TStringBuf("Sc_CURRENCY"), CAT_Sc_CURRENCY, 35); + Insert(TStringBuf("Sk"), CAT_Sk, 59); + Insert(TStringBuf("Sk_MODIFIER"), CAT_Sk_MODIFIER, 59); + Insert(TStringBuf("Sm"), CAT_Sm, 129); + Insert(TStringBuf("Sm_MATH"), CAT_Sm_MATH, 135); + Insert(TStringBuf("Sm_MINUS"), CAT_Sm_MINUS, 7); + Insert(TStringBuf("So"), CAT_So, 349); + Insert(TStringBuf("So_OTHER"), CAT_So_OTHER, 349); + Insert(TStringBuf("Z"), CAT_Z, 17); + Insert(TStringBuf("Zl"), CAT_Zl, 3); + Insert(TStringBuf("Zl_LINE"), CAT_Zl_LINE, 3); + Insert(TStringBuf("Zp"), CAT_Zp, 3); + Insert(TStringBuf("Zp_PARAGRAPH"), CAT_Zp_PARAGRAPH, 3); + Insert(TStringBuf("Zs"), CAT_Zs, 15); + Insert(TStringBuf("Zs_SPACE"), CAT_Zs_SPACE, 15); + Insert(TStringBuf("Zs_ZWSPACE"), CAT_Zs_ZWSPACE, 3); + } + + inline void Insert(const TStringBuf& category, const wchar32* data, size_t count) { + TCategoryRanges categoryRanges; + categoryRanges.Count = count; + categoryRanges.Data = data; + TBase::insert(::std::make_pair(category, categoryRanges)); + } + + inline const TCategoryRanges& Get(const TStringBuf& category) const { + TBase::const_iterator i = TBase::find(category); + if (i == TBase::end()) + throw yexception() << "Unknown Unicode category name '" << category << "'"; + return i->second; + } +}; + +const TCategoryRanges& GetCategoryRanges(WC_TYPE cat) { + Y_ASSERT(static_cast<size_t>(cat) < Y_ARRAY_SIZE(CATEGORY_RANGES)); + return CATEGORY_RANGES[cat]; + +} + +const TCategoryRanges& GetCategoryRanges(const TStringBuf& category) { + return Default<TNamedCategoryRanges>().Get(category); +} + +} // NPrivate +} // NUnicode + diff --git a/library/cpp/unicode/set/quoted_pair.cpp b/library/cpp/unicode/set/quoted_pair.cpp new file mode 100644 index 0000000000..7675dbaa74 --- /dev/null +++ b/library/cpp/unicode/set/quoted_pair.cpp @@ -0,0 +1,53 @@ +#include "quoted_pair.h" + +#include <util/generic/strbuf.h> + +namespace NUnicode { + EUnicodeQuotedPairType ResolveUnicodeQuotedPair(wchar32 escapedSymbol, wchar32& symbol, TUnicodeSet& set) { + switch (escapedSymbol) { + case wchar32('a'): // \a -> U+0007 Bell + symbol = wchar32('\a'); + return UQPT_SYMBOL; + case wchar32('b'): // \b -> U+0008 Backspace + symbol = wchar32('\b'); + return UQPT_SYMBOL; + case wchar32('t'): // \t -> U+0009 Horizontal Tab + symbol = wchar32('\t'); + return UQPT_SYMBOL; + case wchar32('n'): // \n -> U+000A Line Feed + symbol = wchar32('\n'); + return UQPT_SYMBOL; + case wchar32('v'): // \v -> U+000B Vertical Tab + symbol = wchar32('\v'); + return UQPT_SYMBOL; + case wchar32('f'): // \f -> U+000C Form Feed + symbol = wchar32('\f'); + return UQPT_SYMBOL; + case wchar32('r'): // \r -> U+000D Carriage Return + symbol = wchar32('\r'); + return UQPT_SYMBOL; + case wchar32('s'): + set.AddCategory(TStringBuf("Z")); + return UQPT_SET; + case wchar32('S'): + set.Add(TUnicodeSet().AddCategory(TStringBuf("Z")).Invert()); + return UQPT_SET; + case wchar32('w'): + set.AddCategory(TStringBuf("L")); + return UQPT_SET; + case wchar32('W'): + set.Add(TUnicodeSet().AddCategory(TStringBuf("L")).Invert()); + return UQPT_SET; + case wchar32('d'): + set.AddCategory(TStringBuf("Nd")); + return UQPT_SET; + case wchar32('D'): + set.Add(TUnicodeSet().AddCategory(TStringBuf("Nd")).Invert()); + return UQPT_SET; + default: + symbol = escapedSymbol; + return UQPT_SYMBOL; + } + } + +} diff --git a/library/cpp/unicode/set/quoted_pair.h b/library/cpp/unicode/set/quoted_pair.h new file mode 100644 index 0000000000..a951071f8c --- /dev/null +++ b/library/cpp/unicode/set/quoted_pair.h @@ -0,0 +1,15 @@ +#pragma once + +#include "unicode_set.h" + +#include <util/system/defaults.h> + +namespace NUnicode { + enum EUnicodeQuotedPairType { + UQPT_SYMBOL, + UQPT_SET, + }; + + EUnicodeQuotedPairType ResolveUnicodeQuotedPair(wchar32 escapedSymbol, wchar32& symbol, TUnicodeSet& set); + +} diff --git a/library/cpp/unicode/set/set.cpp b/library/cpp/unicode/set/set.cpp new file mode 100644 index 0000000000..5f8caa3140 --- /dev/null +++ b/library/cpp/unicode/set/set.cpp @@ -0,0 +1,6 @@ +#include "set.h" + +#include "category_ranges.h" +#include "unicode_set_lexer.h" +#include "unicode_set_parser.h" +#include "unicode_set_token.h" diff --git a/library/cpp/unicode/set/set.h b/library/cpp/unicode/set/set.h new file mode 100644 index 0000000000..cad099694a --- /dev/null +++ b/library/cpp/unicode/set/set.h @@ -0,0 +1,4 @@ +#pragma once + +#include "unicode_set.h" +#include "quoted_pair.h" diff --git a/library/cpp/unicode/set/unicode_set.cpp b/library/cpp/unicode/set/unicode_set.cpp new file mode 100644 index 0000000000..855bcdd9a6 --- /dev/null +++ b/library/cpp/unicode/set/unicode_set.cpp @@ -0,0 +1,480 @@ +#include "unicode_set.h" + +#include "category_ranges.h" +#include "unicode_set_parser.h" + +#include <util/ysaveload.h> +#include <util/charset/wide.h> +#include <util/digest/numeric.h> +#include <util/generic/buffer.h> +#include <util/generic/yexception.h> +#include <util/stream/format.h> +#include <util/stream/input.h> +#include <util/stream/output.h> +#include <util/string/cast.h> + +// The original idea of unicode set implementation was taken from the icu::UnicodeSet. +// UnicodeSet has a set of ranges [from, to), where upper boundary is exclusive. +// The list of ranges always has a terminal value CODEPOINT_HIGH at the end. + +namespace NUnicode { + namespace NPrivate { + inline wchar32 Bound(wchar32 c) { + return c < TUnicodeSet::CODEPOINT_HIGH ? c : TUnicodeSet::CODEPOINT_HIGH - 1; + } + + inline void CheckWcType(WC_TYPE c) { + if (static_cast<size_t>(c) >= CCL_NUM) { + throw yexception() << "Category ID must be less than CCL_NUM (" << static_cast<size_t>(CCL_NUM) << "), specified: " << static_cast<size_t>(c); + } + } + + } + + // Returns the smallest value i >= from such that 'c' < Ranges[i]. + // Some examples: + // GetRangeItem(c, 0) + // set Ranges[] c=0 1 3 4 7 8 + // === ============== =========== + // [] [0x110000] 0 0 0 0 0 0 + // [:Any:] [0, 0x110000] 1 1 1 1 1 1 + // [\u0000-\u0003] [0, 4, 0x110000] 1 1 1 2 2 2 + // [\u0004-\u0007] [4, 8, 0x110000] 0 0 0 1 1 2 + // + // So, if method returns an odd value then 'c' falls to the {Range[i-1],Range[i]} range. + size_t TUnicodeSet::GetRangeItem(wchar32 c, size_t from) const { + Y_ASSERT(Valid()); + Y_ASSERT(from < Length); + if (c < Ranges[from]) + return from; + size_t lo = from; + size_t hi = Length - 1; + if (lo >= hi || c >= Ranges[hi - 1]) { + return hi; + } + for (;;) { + size_t i = (lo + hi) >> 1; + if (i == lo) { + break; + } else if (c < Ranges[i]) { + hi = i; + } else { + lo = i; + } + } + return hi; + } + + wchar32* TUnicodeSet::EnsureCapacity(size_t capacity) { + if (capacity <= Capacity) { + return const_cast<wchar32*>(Ranges); + } + + TDynamicBuffer buf = new wchar32[capacity]; + Copy<const wchar32*, wchar32*>(Ranges, Ranges + Length, buf.Get()); + DoSwap(buf, DynBuffer); + Ranges = DynBuffer.Get(); + Capacity = capacity; + return DynBuffer.Get(); + } + + wchar32* TUnicodeSet::InsertRangeSlots(const size_t pos, const size_t count) { + Y_ASSERT(pos < Length); + wchar32* src = EnsureCapacity(Length + count) + Length - 1; + wchar32* dst = src + count; + for (size_t i = 0; i < Length - pos; ++i) { + *dst-- = *src--; + } + Length += count; + return src + 1; + } + + void TUnicodeSet::EraseRangeSlots(const size_t pos, const size_t count) { + Y_ASSERT(pos < Length); + Y_ASSERT(pos + count <= Length); + wchar32* dst = EnsureWritable() + pos; + wchar32* src = dst + count; + for (size_t i = 0; i < Length - pos - count; ++i) { + *dst++ = *src++; + } + Length -= count; + } + + TUnicodeSet::TUnicodeSet() + : Ranges(ShortBuffer) + , Length(0) + , Capacity(Y_ARRAY_SIZE(ShortBuffer)) + { + Clear(); + } + + TUnicodeSet::TUnicodeSet(const TUnicodeSet& s) + : Ranges(ShortBuffer) + , Length(0) + , Capacity(Y_ARRAY_SIZE(ShortBuffer)) + { + Set(s); + } + + // from, to - inclusive + TUnicodeSet::TUnicodeSet(wchar32 from, wchar32 to) + : Ranges(ShortBuffer) + , Length(0) + , Capacity(Y_ARRAY_SIZE(ShortBuffer)) + { + Set(from, to); + } + + TUnicodeSet::TUnicodeSet(const TWtringBuf& s) + : Ranges(ShortBuffer) + , Length(0) + , Capacity(Y_ARRAY_SIZE(ShortBuffer)) + { + Set(s); + } + + TUnicodeSet::TUnicodeSet(WC_TYPE c) + : Ranges(ShortBuffer) + , Length(0) + , Capacity(Y_ARRAY_SIZE(ShortBuffer)) + { + Set(c); + } + + void TUnicodeSet::AddPredefRanges(const NPrivate::TCategoryRanges& ranges) { + if (ranges.Count > 0) { + for (size_t i = 0; i + 1 < ranges.Count; i += 2) { + Add(ranges.Data[i], ranges.Data[i + 1] - 1); + } + } + } + + TUnicodeSet& TUnicodeSet::Add(const TUnicodeSet& s) { + if (Empty()) { + TUnicodeSet::operator=(s); + return *this; + } + for (size_t i = 0; i + 1 < s.Length; i += 2) { + Add(s.Ranges[i], s.Ranges[i + 1] - 1); + } + return *this; + } + + TUnicodeSet& TUnicodeSet::Add(const TWtringBuf& s) { + const wchar16* begin = s.data(); + const wchar16* end = s.data() + s.size(); + while (begin < end) { + Add(ReadSymbolAndAdvance(begin, end)); + } + return *this; + } + + TUnicodeSet& TUnicodeSet::Add(wchar32 c) { + c = NPrivate::Bound(c); + const size_t i = GetRangeItem(c); + if (i & 1) { + return *this; + } + if (c == Ranges[i] - 1) { // The char adjoins with the next range + if (i > 0 && Ranges[i - 1] == c) { // The char adjoins with the previous range too + if (i + 1 == Length) { // Don't delete the last TERMINAL + EraseRangeSlots(i - 1, 1); + } else { + EraseRangeSlots(i - 1, 2); // Collapse ranges + } + } else { + EnsureWritable()[i] = c; + } + } else if (i > 0 && Ranges[i - 1] == c) { + ++(EnsureWritable()[i - 1]); + } else { + wchar32* target = InsertRangeSlots(i, 2); + *target++ = c; + *target = c + 1; + } + Y_ASSERT(Valid()); + + return *this; + } + + TUnicodeSet& TUnicodeSet::Add(wchar32 from, wchar32 to) { + from = NPrivate::Bound(from); + to = NPrivate::Bound(to); + Y_ASSERT(from <= to); + if (to == from) { + return Add(to); + } else if (from > to) { + return *this; + } + + size_t i = GetRangeItem(from); + + if (to < Ranges[i]) { + if (i & 1) { + return *this; + } + if (i > 0 && Ranges[i - 1] == from) { + if (Ranges[i] == to + 1) { + if (i + 1 == Length) { + EraseRangeSlots(i - 1, 1); + } else { + EraseRangeSlots(i - 1, 2); + } + } else { + EnsureWritable()[i - 1] = to + 1; + } + } else if (Ranges[i] == to + 1) { + if (i + 1 == Length) { + *InsertRangeSlots(i, 1) = from; + } else { + EnsureWritable()[i] = from; + } + } else { + wchar32* target = InsertRangeSlots(i, 2); + *target++ = from; + *target = to + 1; + } + Y_ASSERT(Valid()); + return *this; + } + + size_t j = GetRangeItem(to, i); + Y_ASSERT(i < j); + + if (0 == (j & 1)) { // 'to' falls between ranges + if (Ranges[j] > to + 1) { + *InsertRangeSlots(j, 1) = to + 1; + } else if (j + 1 < Length) { // Exclude last TERMINAL element + Y_ASSERT(Ranges[j] == to + 1); + // The next range adjoins with the current one. Join them + ++j; + } + } + + if (0 == (i & 1)) { // 'from' falls between ranges + if (i > 0 && Ranges[i - 1] == from) { + --i; + } else { + *InsertRangeSlots(i, 1) = from; + ++i; + ++j; + } + } + + // Erase ranges, which are covered by the new one + Y_ASSERT(i <= j); + Y_ASSERT(i <= Length); + Y_ASSERT(j <= Length); + EraseRangeSlots(i, j - i); + + Y_ASSERT(Valid()); + return *this; + } + + TUnicodeSet& TUnicodeSet::Add(WC_TYPE c) { + NPrivate::CheckWcType(c); + if (Empty()) { + return Set(c); + } + AddPredefRanges(NPrivate::GetCategoryRanges(c)); + return *this; + } + + TUnicodeSet& TUnicodeSet::AddCategory(const TStringBuf& catName) { + if (Empty()) { + return SetCategory(catName); + } + AddPredefRanges(NPrivate::GetCategoryRanges(catName)); + return *this; + } + + void TUnicodeSet::SetPredefRanges(const NPrivate::TCategoryRanges& ranges) { + Clear(); + if (ranges.Count > 0) { + DynBuffer.Drop(); + Ranges = ranges.Data; + Length = ranges.Count; + Capacity = 0; + } + } + + TUnicodeSet& TUnicodeSet::Set(const TUnicodeSet& s) { + if (0 == s.Capacity) { + DynBuffer.Drop(); + Ranges = s.Ranges; + Length = s.Length; + Capacity = 0; + } else if (s.Ranges == s.DynBuffer.Get()) { + DynBuffer = s.DynBuffer; + Ranges = DynBuffer.Get(); + Length = s.Length; + Capacity = s.Capacity; + } else { + ::Copy(s.Ranges, s.Ranges + s.Length, EnsureCapacity(s.Length)); + Length = s.Length; + } + return *this; + } + + TUnicodeSet& TUnicodeSet::Set(wchar32 from, wchar32 to) { + from = NPrivate::Bound(from); + to = NPrivate::Bound(to); + Y_ASSERT(from <= to); + + Clear(); + + if (to == from) { + return Add(to); + } else if (from > to) { + return *this; + } + + if (to + 1 != CODEPOINT_HIGH) { + wchar32* target = InsertRangeSlots(0, 2); + *target++ = from; + *target = to + 1; + } else { + *InsertRangeSlots(0, 1) = from; + } + Y_ASSERT(Valid()); + return *this; + } + + TUnicodeSet& TUnicodeSet::Set(const TWtringBuf& s) { + Clear(); + return Add(s); + } + + TUnicodeSet& TUnicodeSet::Set(WC_TYPE c) { + NPrivate::CheckWcType(c); + SetPredefRanges(NPrivate::GetCategoryRanges(c)); + return *this; + } + + TUnicodeSet& TUnicodeSet::SetCategory(const TStringBuf& catName) { + SetPredefRanges(NPrivate::GetCategoryRanges(catName)); + return *this; + } + + TUnicodeSet& TUnicodeSet::Invert() { + Y_ASSERT(Valid()); + if (0 == Ranges[0]) { + EraseRangeSlots(0, 1); + } else { + *InsertRangeSlots(0, 1) = 0; + } + return *this; + } + + TUnicodeSet& TUnicodeSet::MakeCaseInsensitive() { + TVector<wchar32> oldRanges(Ranges, Ranges + Length); + for (size_t i = 0; i + 1 < oldRanges.size(); i += 2) { + for (wchar32 c = oldRanges[i]; c < oldRanges[i + 1]; ++c) { + const ::NUnicode::NPrivate::TProperty& p = ::NUnicode::NPrivate::CharProperty(c); + if (p.Lower) { + Add(static_cast<wchar32>(c + p.Lower)); + } + if (p.Upper) { + Add(static_cast<wchar32>(c + p.Upper)); + } + if (p.Title) { + Add(static_cast<wchar32>(c + p.Title)); + } + } + } + return *this; + } + + TUnicodeSet& TUnicodeSet::Clear() { + if (IsStatic() || IsShared()) { + DynBuffer.Drop(); + ShortBuffer[0] = CODEPOINT_HIGH; + Capacity = Y_ARRAY_SIZE(ShortBuffer); + Ranges = ShortBuffer; + } else { + const_cast<wchar32*>(Ranges)[0] = CODEPOINT_HIGH; + } + Length = 1; + return *this; + } + + size_t TUnicodeSet::Hash() const { + size_t res = 0; + for (size_t i = 0; i < Length; ++i) { + res = ::CombineHashes(size_t(Ranges[i]), res); + } + return res; + } + + inline void WriteUnicodeChar(IOutputStream& out, wchar32 c, bool needEscape = false) { + switch (c) { + case wchar32('-'): + case wchar32('\\'): + case wchar32('^'): + needEscape = true; + break; + default: + break; + } + if (::IsGraph(c) && !needEscape) { + char buf[4]; // Max utf8 char length is 4 + size_t wr = 0; + WideToUTF8(&c, 1, buf, wr); + Y_ASSERT(wr <= Y_ARRAY_SIZE(buf)); + out.Write(buf, wr); + } else { + TString hexRepr = IntToString<16>(c); + if (c >> 8 == 0) { + out << "\\x" << LeftPad(hexRepr, 2, '0'); + } else if (c >> 16 == 0) { + out << "\\u" << LeftPad(hexRepr, 4, '0'); + } else { + out << "\\U" << LeftPad(hexRepr, 8, '0'); + } + } + } + + TString TUnicodeSet::ToString(bool escapeAllChars /* = false*/) const { + Y_ASSERT(Valid()); + TStringStream str; + str.Reserve(Length * 4 + Length / 2 + 2); + + str.Write('['); + for (size_t i = 0; i + 1 < Length; i += 2) { + WriteUnicodeChar(str, Ranges[i], escapeAllChars); + if (Ranges[i] + 1 < Ranges[i + 1]) { + // Don't write dash for two-symbol ranges + if (Ranges[i] + 2 < Ranges[i + 1]) { + str.Write('-'); + } + WriteUnicodeChar(str, Ranges[i + 1] - 1, escapeAllChars); + } + } + str.Write(']'); + + return str.Str(); + } + + void TUnicodeSet::Save(IOutputStream* out) const { + ::SaveSize(out, Length); + ::SaveArray(out, Ranges, Length); + } + + void TUnicodeSet::Load(IInputStream* in) { + const size_t length = ::LoadSize(in); + if (length > 0) { + ::LoadArray(in, EnsureCapacity(length), length); + } + Length = length; + if (!Valid()) { + ythrow TSerializeException() << "Loaded broken unicode set"; + } + } + + TUnicodeSet& TUnicodeSet::Parse(const TWtringBuf& data) { + Clear(); + NPrivate::ParseUnicodeSet(*this, data); + return *this; + } + +} diff --git a/library/cpp/unicode/set/unicode_set.h b/library/cpp/unicode/set/unicode_set.h new file mode 100644 index 0000000000..e573e05143 --- /dev/null +++ b/library/cpp/unicode/set/unicode_set.h @@ -0,0 +1,154 @@ +#pragma once + +#include <util/str_stl.h> +#include <util/charset/unidata.h> +#include <util/generic/algorithm.h> +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/utility.h> +#include <util/generic/vector.h> + +class IInputStream; +class IOutputStream; + +namespace NUnicode { + namespace NPrivate { + struct TCategoryRanges; + } + + class TUnicodeSet { + private: + typedef TSimpleSharedPtr<wchar32, TDeleteArray> TDynamicBuffer; + + // Ranges can point to: + // 1) ShortBuffer for short sets (not more than 2 ranges) + // 2) static data (for predefined unicode categories) + // 3) or DynBuffer for big sets + const wchar32* Ranges; + wchar32 ShortBuffer[5]; + TDynamicBuffer DynBuffer; // Can be shared between multiple sets + size_t Length; // Number of slots in Ranges + size_t Capacity; // Capacity of currently used buffer. Zero value means reference to static data + + private: + Y_FORCE_INLINE bool IsShared() const { + return Ranges == DynBuffer.Get() && DynBuffer.RefCount() > 1; + } + + Y_FORCE_INLINE bool IsStatic() const { + return 0 == Capacity; + } + + size_t GetRangeItem(wchar32 c, size_t from = 0) const; + + // Extends buffer capacity if required and returns pointer to the writable buffer of slots + wchar32* EnsureCapacity(size_t capacity); + + // Makes the copy of buffer if the unicode set points to the static or shared data, and returns pointer to the writable buffer of slots + wchar32* EnsureWritable() { + if (IsShared()) { + // If multiple UnicodeSets refer to the same buffer then make the copy + Capacity = 0; + } + if (IsStatic()) { + // Copy static or shared data to own buffer before modifying + return EnsureCapacity(Length); + } + return const_cast<wchar32*>(Ranges); + } + + // Returns pointer to the first inserted slot + wchar32* InsertRangeSlots(const size_t pos, const size_t count); + void EraseRangeSlots(const size_t pos, const size_t count); + + void AddPredefRanges(const NPrivate::TCategoryRanges& ranges); + void SetPredefRanges(const NPrivate::TCategoryRanges& ranges); + + public: + enum { + CODEPOINT_HIGH = 0x110000 // Next value after maximum valid code point + }; + + TUnicodeSet(); + TUnicodeSet(const TUnicodeSet& s); + // Unicode set for specific character range. "from", "to" are inclusive + TUnicodeSet(wchar32 from, wchar32 to); + // Unicode set consists of all characters from the specified string + TUnicodeSet(const TWtringBuf& s); + // Unicode set for predefined category + TUnicodeSet(WC_TYPE c); + + TUnicodeSet& operator=(const TUnicodeSet& s) { + return Set(s); + } + + inline bool operator==(const TUnicodeSet& s) const { + return Length == s.Length && (Ranges == s.Ranges || ::Equal(Ranges, Ranges + Length, s.Ranges)); + } + + friend inline TUnicodeSet operator~(TUnicodeSet s) { + return s.Invert(); + } + + friend inline TUnicodeSet operator+(const TUnicodeSet& s1, const TUnicodeSet& s2) { + return TUnicodeSet(s1).Add(s2); + } + + TUnicodeSet& Add(const TUnicodeSet& s); + TUnicodeSet& Add(const TWtringBuf& s); + TUnicodeSet& Add(wchar32 c); + // from, to - inclusive + TUnicodeSet& Add(wchar32 from, wchar32 to); + TUnicodeSet& Add(WC_TYPE c); + // Add unicode category by name (one- or two-letter) + TUnicodeSet& AddCategory(const TStringBuf& catName); + + TUnicodeSet& Set(const TUnicodeSet& s); + // from, to - inclusive + TUnicodeSet& Set(wchar32 from, wchar32 to); + TUnicodeSet& Set(const TWtringBuf& s); + TUnicodeSet& Set(WC_TYPE c); + TUnicodeSet& SetCategory(const TStringBuf& catName); + + TUnicodeSet& Invert(); + // Converts existing unicode set to the case-insensitive set + TUnicodeSet& MakeCaseInsensitive(); + TUnicodeSet& Clear(); + + size_t Hash() const; + TString ToString(bool escapeAllChars = false) const; + + inline bool Valid() const { + return Length > 0 && Ranges[Length - 1] == CODEPOINT_HIGH; + } + + inline bool Has(wchar32 c) const { + if (Y_UNLIKELY(c >= CODEPOINT_HIGH)) { + return false; + } + const size_t item = GetRangeItem(c); + return (item & 1); + } + + inline bool Empty() const { + Y_ASSERT(Valid()); + return Length < 2; + } + + void Save(IOutputStream* out) const; + void Load(IInputStream* in); + + TUnicodeSet& Parse(const TWtringBuf& data); + }; + + using TUnicodeSetPtr = TSimpleSharedPtr<TUnicodeSet>; + +} + +template <> +struct THash<NUnicode::TUnicodeSet> { + size_t operator()(const NUnicode::TUnicodeSet& s) const { + return s.Hash(); + } +}; diff --git a/library/cpp/unicode/set/unicode_set_lexer.h b/library/cpp/unicode/set/unicode_set_lexer.h new file mode 100644 index 0000000000..c584f317fc --- /dev/null +++ b/library/cpp/unicode/set/unicode_set_lexer.h @@ -0,0 +1,49 @@ +#pragma once + +#include "unicode_set_token.h" + +#include <util/generic/strbuf.h> +#include <util/system/yassert.h> + +namespace NUnicode { + namespace NPrivate { + class TUnicodeSetLexer { + private: + const TWtringBuf& Data; + + int cs; + //int* stack; + //int top; + int act; + const wchar16* ts; + const wchar16* te; + const wchar16* p; + const wchar16* pe; + const wchar16* eof; + + TUnicodeSetToken LastToken; + bool UseLast; + + private: + EUnicodeSetTokenType YieldToken(EUnicodeSetTokenType type); + EUnicodeSetTokenType YieldToken(EUnicodeSetTokenType type, wchar16 symbol); + EUnicodeSetTokenType YieldToken(EUnicodeSetTokenType type, const wchar16* dataBegin, size_t dataSize); + void Reset(); + + public: + explicit TUnicodeSetLexer(const TWtringBuf& data); + + EUnicodeSetTokenType GetToken(); + + const TUnicodeSetToken& GetLastToken() { + return LastToken; + } + + inline void PushBack() { + Y_VERIFY(!UseLast, "Double TUnicodeSetLexer::PushBack()"); + UseLast = true; + } + }; + + } +} diff --git a/library/cpp/unicode/set/unicode_set_lexer.rl6 b/library/cpp/unicode/set/unicode_set_lexer.rl6 new file mode 100644 index 0000000000..ebbc131556 --- /dev/null +++ b/library/cpp/unicode/set/unicode_set_lexer.rl6 @@ -0,0 +1,125 @@ +#include <library/cpp/unicode/set/unicode_set_lexer.h> + +#include <util/generic/yexception.h> + +namespace NUnicode { +namespace NPrivate { + +%%{ + machine unicode_set_lexer; + + alphtype unsigned short; + + action IncorrectCategoryError { + throw yexception() << "incorrect category"; + } + + action IncorrectEscapedCodepointError { + throw yexception() << "incorrect escaped codepoint"; + } + + action IncorrectQuotedPairError { + throw yexception() << "incorrect quoted pair"; + } + + id = alpha (alnum | '_')*; + escape = [%\\]; + + category = (':' id ':') <>^IncorrectCategoryError; + xdigit8 = xdigit{8} @^IncorrectEscapedCodepointError; + xdigit4 = xdigit{4} @^IncorrectEscapedCodepointError; + xdigit2 = xdigit{2} @^IncorrectEscapedCodepointError; + symbol = any @^IncorrectQuotedPairError; + + main := |* + '^' => { + return YieldToken(USTT_NEGATION); + }; + '-' => { + return YieldToken(USTT_RANGE); + }; + '[' => { + return YieldToken(USTT_LBRACKET); + }; + ']' => { + return YieldToken(USTT_RBRACKET); + }; + category => { + return YieldToken(USTT_CATEGORY, ts + 1, te - ts -2); + }; + escape 'U' xdigit8 => { + return YieldToken(USTT_CODEPOINT32, ts + 2, 8); + }; + escape 'u' xdigit4 => { + return YieldToken(USTT_CODEPOINT16, ts + 2, 4); + }; + escape 'x' xdigit2 => { + return YieldToken(USTT_CODEPOINT8, ts + 2, 2); + }; + escape symbol => { + return YieldToken(USTT_QUOTED_PAIR, *(ts + 1)); + }; + any => { + return YieldToken(USTT_SYMBOL, *ts); + }; + *|; + +}%% + +namespace { + +%% write data; + +} + +TUnicodeSetLexer::TUnicodeSetLexer(const TWtringBuf& data) + : Data(data) + , cs(0) + , act(0) + , ts(NULL) + , te(NULL) + , p(Data.data()) + , pe(Data.data() + Data.size()) + , eof(pe) + , UseLast(false) +{ + %% write init; +} + +EUnicodeSetTokenType TUnicodeSetLexer::GetToken() { + if (UseLast) { + UseLast = false; + return LastToken.Type; + } + + %% write exec; + + return YieldToken(USTT_EOS); +} + +EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type) { + Reset(); + LastToken = TUnicodeSetToken(type); + return type; +} + +EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, wchar16 symbol) { + Reset(); + LastToken = TUnicodeSetToken(type, symbol); + return type; +} + +EUnicodeSetTokenType TUnicodeSetLexer::YieldToken(EUnicodeSetTokenType type, const wchar16* dataBegin, size_t dataSize) { + Reset(); + LastToken = TUnicodeSetToken(type, dataBegin, dataSize); + return type; +} + +void TUnicodeSetLexer::Reset() { + p = te; + ts = NULL; + te = NULL; +} + +} // NPrivate +} // NUnicode diff --git a/library/cpp/unicode/set/unicode_set_parser.cpp b/library/cpp/unicode/set/unicode_set_parser.cpp new file mode 100644 index 0000000000..754c7add41 --- /dev/null +++ b/library/cpp/unicode/set/unicode_set_parser.cpp @@ -0,0 +1,109 @@ +#include "quoted_pair.h" +#include "unicode_set_lexer.h" + +#include <util/string/cast.h> +#include <util/charset/wide.h> + +namespace NUnicode { + namespace NPrivate { +#define UNEXPECTED_TOKEN throw yexception() << "Unexpected token: " << lexer.GetLastToken() + +#define EXPECT_TOKEN(type) \ + if (lexer.GetToken() != type) { \ + UNEXPECTED_TOKEN; \ + } + + void ParseUnicodeSet(TUnicodeSet& set, TUnicodeSetLexer& lexer); + + void ParseCharSequence(TUnicodeSet& set, TUnicodeSetLexer& lexer) { + wchar32 prevChar = 0; + bool range = false; + for (EUnicodeSetTokenType type = lexer.GetToken(); type != USTT_RBRACKET; type = lexer.GetToken()) { + wchar32 curChar = 0; + switch (type) { + case USTT_SYMBOL: + curChar = lexer.GetLastToken().Symbol; + break; + case USTT_NEGATION: + curChar = '^'; + break; + case USTT_QUOTED_PAIR: + ResolveUnicodeQuotedPair(lexer.GetLastToken().Symbol, curChar, set); + break; + case USTT_CODEPOINT8: + case USTT_CODEPOINT16: + case USTT_CODEPOINT32: + curChar = IntFromString<ui32, 16>(lexer.GetLastToken().Data); + if (curChar >= TUnicodeSet::CODEPOINT_HIGH) { + throw yexception() << "Invalid unicode codepoint: " << lexer.GetLastToken(); + } + break; + case USTT_RANGE: + if (0 == prevChar) { + UNEXPECTED_TOKEN; + } + range = true; + continue; + case USTT_LBRACKET: { + lexer.PushBack(); + TUnicodeSet inner; + ParseUnicodeSet(inner, lexer); + set.Add(inner); + break; + } + default: + UNEXPECTED_TOKEN; + } + if (curChar) { + if (range) { + if (prevChar >= curChar) { + throw yexception() << "Invalid character range"; + } + set.Add(prevChar, curChar); + curChar = 0; + } else { + set.Add(curChar); + } + } else if (range) { + UNEXPECTED_TOKEN; + } + range = false; + prevChar = curChar; + } + if (range) { + UNEXPECTED_TOKEN; + } + lexer.PushBack(); + } + + void ParseUnicodeSet(TUnicodeSet& set, TUnicodeSetLexer& lexer) { + EXPECT_TOKEN(USTT_LBRACKET); + bool invert = false; + if (USTT_NEGATION == lexer.GetToken()) { + invert = true; + } else { + lexer.PushBack(); + } + + if (USTT_CATEGORY == lexer.GetToken()) { + set.AddCategory(WideToUTF8(lexer.GetLastToken().Data)); + } else { + lexer.PushBack(); + ParseCharSequence(set, lexer); + } + + EXPECT_TOKEN(USTT_RBRACKET); + + if (invert) { + set.Invert(); + } + } + + void ParseUnicodeSet(TUnicodeSet& set, const TWtringBuf& data) { + TUnicodeSetLexer lexer(data); + ParseUnicodeSet(set, lexer); + EXPECT_TOKEN(USTT_EOS); + } + + } // NPrivate +} diff --git a/library/cpp/unicode/set/unicode_set_parser.h b/library/cpp/unicode/set/unicode_set_parser.h new file mode 100644 index 0000000000..64516ae23a --- /dev/null +++ b/library/cpp/unicode/set/unicode_set_parser.h @@ -0,0 +1,11 @@ +#pragma once + +#include "unicode_set.h" + +#include <util/generic/strbuf.h> + +namespace NUnicode { + namespace NPrivate { + void ParseUnicodeSet(TUnicodeSet& set, const TWtringBuf& data); + } +} diff --git a/library/cpp/unicode/set/unicode_set_token.cpp b/library/cpp/unicode/set/unicode_set_token.cpp new file mode 100644 index 0000000000..525e3dea29 --- /dev/null +++ b/library/cpp/unicode/set/unicode_set_token.cpp @@ -0,0 +1 @@ +#include "unicode_set_token.h" diff --git a/library/cpp/unicode/set/unicode_set_token.h b/library/cpp/unicode/set/unicode_set_token.h new file mode 100644 index 0000000000..6fa619f757 --- /dev/null +++ b/library/cpp/unicode/set/unicode_set_token.h @@ -0,0 +1,68 @@ +#pragma once + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/stream/output.h> + +namespace NUnicode { + namespace NPrivate { + enum EUnicodeSetTokenType { + USTT_EOS /* "eos" */, + USTT_SYMBOL /* "symbol" */, + USTT_QUOTED_PAIR /* "quoted-pair" */, + USTT_CODEPOINT8 /* "codepoint8" */, + USTT_CODEPOINT16 /* "codepoint16" */, + USTT_CODEPOINT32 /* "codepoint32" */, + USTT_CATEGORY /* "category" */, + USTT_NEGATION /* "negation" */, + USTT_RANGE /* "range" */, + USTT_LBRACKET /* "lbracket" */, + USTT_RBRACKET /* "rbracket" */, + }; + + struct TUnicodeSetToken { + EUnicodeSetTokenType Type; + wchar16 Symbol; + TWtringBuf Data; + + explicit TUnicodeSetToken() + : Type(USTT_EOS) + , Symbol(0) + , Data() + { + } + + explicit TUnicodeSetToken(EUnicodeSetTokenType tokenType) + : Type(tokenType) + , Symbol(0) + , Data() + { + } + + explicit TUnicodeSetToken(EUnicodeSetTokenType tokenType, wchar16 symbol) + : Type(tokenType) + , Symbol(symbol) + , Data() + { + } + + explicit TUnicodeSetToken(EUnicodeSetTokenType tokenType, const wchar16* dataBegin, size_t dataSize) + : Type(tokenType) + , Symbol(0) + , Data(dataBegin, dataSize) + { + } + }; + + } +} + +Y_DECLARE_OUT_SPEC(inline, NUnicode::NPrivate::TUnicodeSetToken, output, token) { + output << token.Type; + if (token.Symbol) { + output << ":" << TUtf16String(1, token.Symbol).Quote(); + } + if (!token.Data.empty()) { + output << ":" << TUtf16String(token.Data).Quote(); + } +} |