diff options
author | galtsev <galtsev@yandex-team.com> | 2022-09-26 23:57:03 +0300 |
---|---|---|
committer | galtsev <galtsev@yandex-team.com> | 2022-09-26 23:57:03 +0300 |
commit | 22378bbd93b81795dcd66d084d843b0276190a76 (patch) | |
tree | b89f157fb8d595dba7a52e896bad2510f36019a8 | |
parent | c97ae34b5d36b0e1bd72bb769c23e6a83e69703a (diff) | |
download | ydb-22378bbd93b81795dcd66d084d843b0276190a76.tar.gz |
Move controls out of assigned unicode codepoint set to allow any unicode codepoint in PIRE's lexer.
Issue:
-rw-r--r-- | contrib/libs/pire/pire/re_lexer.h | 4 | ||||
-rw-r--r-- | contrib/libs/pire/ut/read_unicode_ut.cpp | 9 |
2 files changed, 11 insertions, 2 deletions
diff --git a/contrib/libs/pire/pire/re_lexer.h b/contrib/libs/pire/pire/re_lexer.h index 5591c16d34..b961390615 100644 --- a/contrib/libs/pire/pire/re_lexer.h +++ b/contrib/libs/pire/pire/re_lexer.h @@ -43,8 +43,8 @@ namespace Pire { namespace Consts { enum { Inf = -1 }; -static const wchar32 Control = 0xF000; -static const wchar32 ControlMask = 0xFF00; +static const wchar32 Control = 0xF0000000; +static const wchar32 ControlMask = 0xFF000000; static const wchar32 End = Control | 0xFF; }; diff --git a/contrib/libs/pire/ut/read_unicode_ut.cpp b/contrib/libs/pire/ut/read_unicode_ut.cpp index f0433401c7..1756909687 100644 --- a/contrib/libs/pire/ut/read_unicode_ut.cpp +++ b/contrib/libs/pire/ut/read_unicode_ut.cpp @@ -295,4 +295,13 @@ Y_UNIT_TEST_SUITE(ReadUnicodeTest) { } } + Y_UNIT_TEST(AnyUnicodeCodepointIsAllowed) + { + REGEXP("[\\x{0}-\\x{77}\\x{79}-\\x{10ffff}]") { + ACCEPTS("w"); + DENIES ("x"); + ACCEPTS("y"); + } + } + } |