aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorswarmer <swarmer@yandex-team.com>2024-09-30 22:24:38 +0300
committerswarmer <swarmer@yandex-team.com>2024-09-30 22:35:54 +0300
commit803c95f77d7e098750be07c125e78f892ec7c169 (patch)
treed69fd2dbf013c998844350fc99b20929ab0ac9f2
parenta6718e3c426bdd6f17af1f4f68f5a6a9b13f47be (diff)
downloadydb-803c95f77d7e098750be07c125e78f892ec7c169.tar.gz
enum_parser: support digit separators in numeric literals
commit_hash:9791d25e9ea02f73329f9755f7c70f335c612121
-rw-r--r--library/cpp/cppparser/parser.cpp34
-rw-r--r--tools/enum_parser/parse_enum/parse_enum.cpp14
-rw-r--r--tools/enum_parser/parse_enum/parse_enum_ut.cpp57
-rw-r--r--tools/enum_parser/parse_enum/ut/digit_separator.h19
-rw-r--r--tools/enum_parser/parse_enum/ut/ya.make1
5 files changed, 125 insertions, 0 deletions
diff --git a/library/cpp/cppparser/parser.cpp b/library/cpp/cppparser/parser.cpp
index 3bd968b459..70fb6a8735 100644
--- a/library/cpp/cppparser/parser.cpp
+++ b/library/cpp/cppparser/parser.cpp
@@ -1,4 +1,5 @@
#include <util/generic/hash.h>
+#include <util/string/ascii.h>
#include <util/string/cast.h>
#include <util/generic/hash_set.h>
#include <util/generic/yexception.h>
@@ -127,6 +128,10 @@ private:
break;
case '\'':
+ if (QuoteCharIsADigitSeparator()) {
+ Text_.Data += ch;
+ break;
+ }
Action(ch);
State_ = Character;
@@ -356,6 +361,35 @@ private:
}
}
+ // digit separator in integral literal (ex. 73'709'550'592)
+ bool QuoteCharIsADigitSeparator() const {
+ const TStringBuf data = Text_.Data;
+ if (data.empty()) {
+ return false;
+ }
+ if (!IsAsciiHex(data.back())) {
+ return false;
+ }
+ // check for char literal prefix (ex. `u8'$'`)
+ static constexpr TStringBuf literalPrefixes[] {
+ "u8",
+ "u",
+ "U",
+ "L",
+ };
+ for (const TStringBuf& literalPrefix : literalPrefixes) {
+ if (TStringBuf prev; data.BeforeSuffix(literalPrefix, prev)) {
+ if (!prev.empty() && (IsAsciiAlnum(prev.back()) || prev.back() == '_' || prev.back() == '$')) {
+ // some macro name ends with an `u8` sequence
+ continue;
+ }
+ // it is a prefixed character literal
+ return false;
+ }
+ }
+ return true;
+ }
+
inline void Action(char ch) {
Action();
Text_.Data += ch;
diff --git a/tools/enum_parser/parse_enum/parse_enum.cpp b/tools/enum_parser/parse_enum/parse_enum.cpp
index 07712411ae..53102781ae 100644
--- a/tools/enum_parser/parse_enum/parse_enum.cpp
+++ b/tools/enum_parser/parse_enum/parse_enum.cpp
@@ -146,6 +146,20 @@ public:
InEnumState = AfterCppName;
}
+ void DoKeyword(const TText& text) override {
+ if (InValue == InEnumState || InValueCall == InEnumState) {
+ AppendValue(text.Data);
+ return;
+ }
+ }
+
+ void DoCharacter(const TText& text) override {
+ if (InValue == InEnumState || InValueCall == InEnumState) {
+ AppendValue(text.Data);
+ return;
+ }
+ }
+
void DoMultiLineComment(const TText& text) override {
Y_ENSURE(text.Data.size() >= 4, "Invalid multiline comment " << text.Data.Quote() << ". ");
TString commentText = text.Data.substr(2, text.Data.size() - 4);
diff --git a/tools/enum_parser/parse_enum/parse_enum_ut.cpp b/tools/enum_parser/parse_enum/parse_enum_ut.cpp
index 21ed6a2fc4..e979f5119a 100644
--- a/tools/enum_parser/parse_enum/parse_enum_ut.cpp
+++ b/tools/enum_parser/parse_enum/parse_enum_ut.cpp
@@ -3,10 +3,30 @@
#include <tools/enum_parser/parse_enum/parse_enum.h>
+#include <util/generic/array_ref.h>
+#include <util/generic/maybe.h>
+
typedef TEnumParser::TEnum TEnum;
typedef TEnumParser::TEnums TEnums;
typedef TEnumParser::TItems TItems;
+namespace {
+ using TNameValuePair = std::pair<TStringBuf, TMaybe<TStringBuf>>;
+
+ void CompareNameValueItems(TConstArrayRef<TNameValuePair> ref, const TEnum& e) {
+ const TItems& it = e.Items;
+ for (size_t i = 0; i < Min(ref.size(), it.size()); ++i) {
+ const auto& [refCppName, refValue] = ref[i];
+ UNIT_ASSERT_VALUES_EQUAL_C(it[i].CppName, refCppName, e.CppName);
+ UNIT_ASSERT_EQUAL_C(it[i].Value.Defined(), refValue.Defined(), e.CppName);
+ if (refValue.Defined() && it[i].Value.Defined()) {
+ UNIT_ASSERT_VALUES_EQUAL_C(*it[i].Value, *refValue, e.CppName);
+ }
+ }
+ UNIT_ASSERT_VALUES_EQUAL_C(it.size(), ref.size(), e.CppName);
+ }
+}
+
Y_UNIT_TEST_SUITE(TEnumParserTest) {
Y_UNIT_TEST(MainTest) {
@@ -312,4 +332,41 @@ Y_UNIT_TEST_SUITE(TEnumParserTest) {
UNIT_ASSERT(CurrentExceptionMessage().Contains("https://clubs.at.yandex-team.ru/stackoverflow/2603"));
}
}
+
+ Y_UNIT_TEST(DigitSeparatorTest) {
+ TString text = NResource::Find("/digit_separator");
+ TMemoryInput input(text.data(), text.size());
+ TEnumParser parser(input);
+ const TEnums& enums = parser.Enums;
+ UNIT_ASSERT_VALUES_EQUAL(enums.size(), 2u);
+ {
+ const TEnum& e = enums[0];
+ UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ELiterals");
+ static constexpr TNameValuePair ref[]{
+ {"Char", "sizeof(u8'.')"},
+ {"Int", "123'456'789"},
+ {"Float1", "int(456'789.123'456)"},
+ {"Float2", "int(1'2e0'1)"},
+ {"Float3", "int(0x1'2p4)"},
+ };
+ CompareNameValueItems(ref, e);
+ UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u);
+ }
+ {
+ const TEnum& e = enums[1];
+ UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u);
+ UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ETimePrecision");
+ static constexpr TNameValuePair ref[]{
+ {"MicroSeconds", "1"},
+ {"MilliSeconds", "1'000"},
+ {"Seconds", "1'000'000"},
+ {"Minutes", "60'000'000"},
+ {"Hours", "3'600'000'000"},
+ {"Days", "86'400'000'000"},
+ {"Weeks", "604'800'000'000"},
+ };
+ CompareNameValueItems(ref, e);
+ UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u);
+ }
+ }
}
diff --git a/tools/enum_parser/parse_enum/ut/digit_separator.h b/tools/enum_parser/parse_enum/ut/digit_separator.h
new file mode 100644
index 0000000000..c7c8f526ac
--- /dev/null
+++ b/tools/enum_parser/parse_enum/ut/digit_separator.h
@@ -0,0 +1,19 @@
+#pragma once
+
+enum class ELiterals {
+ Char = sizeof(u8'.'),
+ Int = 123'456'789,
+ Float1 = int(456'789.123'456),
+ Float2 = int(1'2e0'1),
+ Float3 = int(0x1'2p4),
+};
+
+enum class ETimePrecision : unsigned long long {
+ MicroSeconds = 1 /* "us" */,
+ MilliSeconds = 1'000 /* "ms" */,
+ Seconds = 1'000'000 /* "s" */,
+ Minutes = 60'000'000 /* "m" */,
+ Hours = 3'600'000'000 /* "h" */,
+ Days = 86'400'000'000 /* "d" */,
+ Weeks = 604'800'000'000 /* "w" */,
+};
diff --git a/tools/enum_parser/parse_enum/ut/ya.make b/tools/enum_parser/parse_enum/ut/ya.make
index eee686134a..7cd8d018ec 100644
--- a/tools/enum_parser/parse_enum/ut/ya.make
+++ b/tools/enum_parser/parse_enum/ut/ya.make
@@ -8,6 +8,7 @@ PEERDIR(
SRCDIR(tools/enum_parser/parse_enum)
RESOURCE(
+ digit_separator.h /digit_separator
enums.h /enums
badcode.h /badcode
unbalanced.h /unbalanced