diff options
author | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
---|---|---|
committer | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
commit | 22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch) | |
tree | bffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/token/accent.cpp | |
parent | 332b99e2173f0425444abb759eebcb2fafaa9209 (diff) | |
download | ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz |
validate canons without yatest_common
Diffstat (limited to 'library/cpp/token/accent.cpp')
-rw-r--r-- | library/cpp/token/accent.cpp | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/library/cpp/token/accent.cpp b/library/cpp/token/accent.cpp new file mode 100644 index 0000000000..be2797b089 --- /dev/null +++ b/library/cpp/token/accent.cpp @@ -0,0 +1,31 @@ +#include "charfilter.h" + +namespace { + struct TRange { + wchar16 First; + wchar16 Last; + }; +} + +TAccentTable::TAccentTable() { + // values of yc_80 copied from "library/cpp/tokenizer/charclasses_16.rl" + TRange ranges[] = { + {0x0300, 0x0357}, {0x035D, 0x036F}, {0x0483, 0x0486}, {0x0488, 0x0489}, {0x0591, 0x05A1}, {0x05A3, 0x05B9}, {0x05BB, 0x05BD}, {0x05BF, 0x0000}, {0x05C1, 0x05C2}, {0x05C4, 0x0000}, {0x0610, 0x0615}, {0x064B, 0x0658}, {0x0670, 0x0000}, {0x06D6, 0x06DC}, {0x06DE, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, {0x0711, 0x0000}, {0x0730, 0x074A}, {0x07A6, 0x07B0}, {0x0901, 0x0903}, {0x093C, 0x0000}, {0x093E, 0x094D}, {0x0951, 0x0954}, {0x0962, 0x0963}, {0x0981, 0x0983}, {0x09BC, 0x0000}, {0x09BE, 0x09C4}, {0x09C7, 0x09C8}, {0x09CB, 0x09CD}, {0x09D7, 0x0000}, {0x09E2, 0x09E3}, {0x0A01, 0x0A03}, {0x0A3C, 0x0000}, {0x0A3E, 0x0A42}, {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A70, 0x0A71}, {0x0A81, 0x0A83}, {0x0ABC, 0x0000}, {0x0ABE, 0x0AC5}, {0x0AC7, 0x0AC9}, {0x0ACB, 0x0ACD}, {0x0AE2, 0x0AE3}, {0x0B01, 0x0B03}, {0x0B3C, 0x0000}, {0x0B3E, 0x0B43}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D}, {0x0B56, 0x0B57}, {0x0B82, 0x0000}, {0x0BBE, 0x0BC2}, {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD7, 0x0000}, {0x0C01, 0x0C03}, {0x0C3E, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D}, {0x0C55, 0x0C56}, {0x0C82, 0x0C83}, {0x0CBC, 0x0000}, {0x0CBE, 0x0CC4}, {0x0CC6, 0x0CC8}, {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0D02, 0x0D03}, {0x0D3E, 0x0D43}, {0x0D46, 0x0D48}, {0x0D4A, 0x0D4D}, {0x0D57, 0x0000}, {0x0D82, 0x0D83}, {0x0DCA, 0x0000}, {0x0DCF, 0x0DD4}, {0x0DD6, 0x0000}, {0x0DD8, 0x0DDF}, {0x0DF2, 0x0DF3}, {0x0E31, 0x0000}, {0x0E34, 0x0E3A}, {0x0E47, 0x0E4E}, {0x0EB1, 0x0000}, {0x0EB4, 0x0EB9}, {0x0EBB, 0x0EBC}, {0x0EC8, 0x0ECD}, {0x0F18, 0x0F19}, {0x0F35, 0x0000}, {0x0F37, 0x0000}, {0x0F39, 0x0000}, {0x0F3E, 0x0F3F}, {0x0F71, 0x0F84}, {0x0F86, 0x0F87}, {0x0F90, 0x0F97}, {0x0F99, 0x0FBC}, {0x0FC6, 0x0000}, {0x102C, 0x1032}, {0x1036, 0x1039}, {0x1056, 0x1059}, {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B6, 0x17D3}, {0x17DD, 0x0000}, {0x180B, 0x180D}, {0x18A9, 0x0000}, {0x1920, 0x192B}, {0x1930, 0x193B}, {0x20D0, 0x20EA}, {0x302A, 0x302F}, {0x3099, 0x309A}, {0xFB1E, 0x0000}, {0xFE00, 0xFE0F}, {0xFE20, 0xFE23}}; + + TRange* const e = ranges + Y_ARRAY_SIZE(ranges); + + // @todo remove this line for static Data + memset(Data, 0, DATA_SIZE); + + for (TRange* r = ranges; r != e; ++r) { + if (r->Last) { + for (wchar16 c = r->First; c <= r->Last; ++c) { + Y_ASSERT((int)c < DATA_SIZE); + Data[c] = 1; + } + } else { + Y_ASSERT((int)r->First < DATA_SIZE); + Data[r->First] = 1; + } + } +} |