aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgaltsev <galtsev@yandex-team.com>2022-09-26 23:57:03 +0300
committergaltsev <galtsev@yandex-team.com>2022-09-26 23:57:03 +0300
commit22378bbd93b81795dcd66d084d843b0276190a76 (patch)
treeb89f157fb8d595dba7a52e896bad2510f36019a8
parentc97ae34b5d36b0e1bd72bb769c23e6a83e69703a (diff)
downloadydb-22378bbd93b81795dcd66d084d843b0276190a76.tar.gz
Move controls out of assigned unicode codepoint set to allow any unicode codepoint in PIRE's lexer.
Issue:
-rw-r--r--contrib/libs/pire/pire/re_lexer.h4
-rw-r--r--contrib/libs/pire/ut/read_unicode_ut.cpp9
2 files changed, 11 insertions, 2 deletions
diff --git a/contrib/libs/pire/pire/re_lexer.h b/contrib/libs/pire/pire/re_lexer.h
index 5591c16d34..b961390615 100644
--- a/contrib/libs/pire/pire/re_lexer.h
+++ b/contrib/libs/pire/pire/re_lexer.h
@@ -43,8 +43,8 @@ namespace Pire {
namespace Consts {
enum { Inf = -1 };
-static const wchar32 Control = 0xF000;
-static const wchar32 ControlMask = 0xFF00;
+static const wchar32 Control = 0xF0000000;
+static const wchar32 ControlMask = 0xFF000000;
static const wchar32 End = Control | 0xFF;
};
diff --git a/contrib/libs/pire/ut/read_unicode_ut.cpp b/contrib/libs/pire/ut/read_unicode_ut.cpp
index f0433401c7..1756909687 100644
--- a/contrib/libs/pire/ut/read_unicode_ut.cpp
+++ b/contrib/libs/pire/ut/read_unicode_ut.cpp
@@ -295,4 +295,13 @@ Y_UNIT_TEST_SUITE(ReadUnicodeTest) {
}
}
+ Y_UNIT_TEST(AnyUnicodeCodepointIsAllowed)
+ {
+ REGEXP("[\\x{0}-\\x{77}\\x{79}-\\x{10ffff}]") {
+ ACCEPTS("w");
+ DENIES ("x");
+ ACCEPTS("y");
+ }
+ }
+
}