diff options
author | swarmer <swarmer@yandex-team.com> | 2024-09-30 22:24:38 +0300 |
---|---|---|
committer | swarmer <swarmer@yandex-team.com> | 2024-09-30 22:35:54 +0300 |
commit | 803c95f77d7e098750be07c125e78f892ec7c169 (patch) | |
tree | d69fd2dbf013c998844350fc99b20929ab0ac9f2 | |
parent | a6718e3c426bdd6f17af1f4f68f5a6a9b13f47be (diff) | |
download | ydb-803c95f77d7e098750be07c125e78f892ec7c169.tar.gz |
enum_parser: support digit separators in numeric literals
commit_hash:9791d25e9ea02f73329f9755f7c70f335c612121
-rw-r--r-- | library/cpp/cppparser/parser.cpp | 34 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/parse_enum.cpp | 14 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/parse_enum_ut.cpp | 57 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/digit_separator.h | 19 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/ya.make | 1 |
5 files changed, 125 insertions, 0 deletions
diff --git a/library/cpp/cppparser/parser.cpp b/library/cpp/cppparser/parser.cpp index 3bd968b459..70fb6a8735 100644 --- a/library/cpp/cppparser/parser.cpp +++ b/library/cpp/cppparser/parser.cpp @@ -1,4 +1,5 @@ #include <util/generic/hash.h> +#include <util/string/ascii.h> #include <util/string/cast.h> #include <util/generic/hash_set.h> #include <util/generic/yexception.h> @@ -127,6 +128,10 @@ private: break; case '\'': + if (QuoteCharIsADigitSeparator()) { + Text_.Data += ch; + break; + } Action(ch); State_ = Character; @@ -356,6 +361,35 @@ private: } } + // digit separator in integral literal (ex. 73'709'550'592) + bool QuoteCharIsADigitSeparator() const { + const TStringBuf data = Text_.Data; + if (data.empty()) { + return false; + } + if (!IsAsciiHex(data.back())) { + return false; + } + // check for char literal prefix (ex. `u8'$'`) + static constexpr TStringBuf literalPrefixes[] { + "u8", + "u", + "U", + "L", + }; + for (const TStringBuf& literalPrefix : literalPrefixes) { + if (TStringBuf prev; data.BeforeSuffix(literalPrefix, prev)) { + if (!prev.empty() && (IsAsciiAlnum(prev.back()) || prev.back() == '_' || prev.back() == '$')) { + // some macro name ends with an `u8` sequence + continue; + } + // it is a prefixed character literal + return false; + } + } + return true; + } + inline void Action(char ch) { Action(); Text_.Data += ch; diff --git a/tools/enum_parser/parse_enum/parse_enum.cpp b/tools/enum_parser/parse_enum/parse_enum.cpp index 07712411ae..53102781ae 100644 --- a/tools/enum_parser/parse_enum/parse_enum.cpp +++ b/tools/enum_parser/parse_enum/parse_enum.cpp @@ -146,6 +146,20 @@ public: InEnumState = AfterCppName; } + void DoKeyword(const TText& text) override { + if (InValue == InEnumState || InValueCall == InEnumState) { + AppendValue(text.Data); + return; + } + } + + void DoCharacter(const TText& text) override { + if (InValue == InEnumState || InValueCall == InEnumState) { + AppendValue(text.Data); + return; + } + } + void DoMultiLineComment(const TText& text) override { Y_ENSURE(text.Data.size() >= 4, "Invalid multiline comment " << text.Data.Quote() << ". "); TString commentText = text.Data.substr(2, text.Data.size() - 4); diff --git a/tools/enum_parser/parse_enum/parse_enum_ut.cpp b/tools/enum_parser/parse_enum/parse_enum_ut.cpp index 21ed6a2fc4..e979f5119a 100644 --- a/tools/enum_parser/parse_enum/parse_enum_ut.cpp +++ b/tools/enum_parser/parse_enum/parse_enum_ut.cpp @@ -3,10 +3,30 @@ #include <tools/enum_parser/parse_enum/parse_enum.h> +#include <util/generic/array_ref.h> +#include <util/generic/maybe.h> + typedef TEnumParser::TEnum TEnum; typedef TEnumParser::TEnums TEnums; typedef TEnumParser::TItems TItems; +namespace { + using TNameValuePair = std::pair<TStringBuf, TMaybe<TStringBuf>>; + + void CompareNameValueItems(TConstArrayRef<TNameValuePair> ref, const TEnum& e) { + const TItems& it = e.Items; + for (size_t i = 0; i < Min(ref.size(), it.size()); ++i) { + const auto& [refCppName, refValue] = ref[i]; + UNIT_ASSERT_VALUES_EQUAL_C(it[i].CppName, refCppName, e.CppName); + UNIT_ASSERT_EQUAL_C(it[i].Value.Defined(), refValue.Defined(), e.CppName); + if (refValue.Defined() && it[i].Value.Defined()) { + UNIT_ASSERT_VALUES_EQUAL_C(*it[i].Value, *refValue, e.CppName); + } + } + UNIT_ASSERT_VALUES_EQUAL_C(it.size(), ref.size(), e.CppName); + } +} + Y_UNIT_TEST_SUITE(TEnumParserTest) { Y_UNIT_TEST(MainTest) { @@ -312,4 +332,41 @@ Y_UNIT_TEST_SUITE(TEnumParserTest) { UNIT_ASSERT(CurrentExceptionMessage().Contains("https://clubs.at.yandex-team.ru/stackoverflow/2603")); } } + + Y_UNIT_TEST(DigitSeparatorTest) { + TString text = NResource::Find("/digit_separator"); + TMemoryInput input(text.data(), text.size()); + TEnumParser parser(input); + const TEnums& enums = parser.Enums; + UNIT_ASSERT_VALUES_EQUAL(enums.size(), 2u); + { + const TEnum& e = enums[0]; + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ELiterals"); + static constexpr TNameValuePair ref[]{ + {"Char", "sizeof(u8'.')"}, + {"Int", "123'456'789"}, + {"Float1", "int(456'789.123'456)"}, + {"Float2", "int(1'2e0'1)"}, + {"Float3", "int(0x1'2p4)"}, + }; + CompareNameValueItems(ref, e); + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + } + { + const TEnum& e = enums[1]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ETimePrecision"); + static constexpr TNameValuePair ref[]{ + {"MicroSeconds", "1"}, + {"MilliSeconds", "1'000"}, + {"Seconds", "1'000'000"}, + {"Minutes", "60'000'000"}, + {"Hours", "3'600'000'000"}, + {"Days", "86'400'000'000"}, + {"Weeks", "604'800'000'000"}, + }; + CompareNameValueItems(ref, e); + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + } + } } diff --git a/tools/enum_parser/parse_enum/ut/digit_separator.h b/tools/enum_parser/parse_enum/ut/digit_separator.h new file mode 100644 index 0000000000..c7c8f526ac --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/digit_separator.h @@ -0,0 +1,19 @@ +#pragma once + +enum class ELiterals { + Char = sizeof(u8'.'), + Int = 123'456'789, + Float1 = int(456'789.123'456), + Float2 = int(1'2e0'1), + Float3 = int(0x1'2p4), +}; + +enum class ETimePrecision : unsigned long long { + MicroSeconds = 1 /* "us" */, + MilliSeconds = 1'000 /* "ms" */, + Seconds = 1'000'000 /* "s" */, + Minutes = 60'000'000 /* "m" */, + Hours = 3'600'000'000 /* "h" */, + Days = 86'400'000'000 /* "d" */, + Weeks = 604'800'000'000 /* "w" */, +}; diff --git a/tools/enum_parser/parse_enum/ut/ya.make b/tools/enum_parser/parse_enum/ut/ya.make index eee686134a..7cd8d018ec 100644 --- a/tools/enum_parser/parse_enum/ut/ya.make +++ b/tools/enum_parser/parse_enum/ut/ya.make @@ -8,6 +8,7 @@ PEERDIR( SRCDIR(tools/enum_parser/parse_enum) RESOURCE( + digit_separator.h /digit_separator enums.h /enums badcode.h /badcode unbalanced.h /unbalanced |