diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /tools/enum_parser/parse_enum | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'tools/enum_parser/parse_enum')
-rw-r--r-- | tools/enum_parser/parse_enum/parse_enum.cpp | 422 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/parse_enum.h | 78 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/parse_enum_ut.cpp | 315 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/alias_before_name.h | 7 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/badcode.h | 10 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/enums.cpp | 195 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/enums.h | 195 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/enums_with_header.h | 8 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/including_header.h | 9 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/stringlist.cpp | 1 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/unbalanced.h | 4 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ut/ya.make | 33 | ||||
-rw-r--r-- | tools/enum_parser/parse_enum/ya.make | 16 |
13 files changed, 1293 insertions, 0 deletions
diff --git a/tools/enum_parser/parse_enum/parse_enum.cpp b/tools/enum_parser/parse_enum/parse_enum.cpp new file mode 100644 index 0000000000..3db0d7a4d9 --- /dev/null +++ b/tools/enum_parser/parse_enum/parse_enum.cpp @@ -0,0 +1,422 @@ +#include "parse_enum.h" + +#include <library/cpp/cppparser/parser.h> + +#include <util/stream/file.h> +#include <util/stream/output.h> +#include <util/stream/input.h> +#include <util/stream/mem.h> + +#include <util/charset/wide.h> +#include <util/string/strip.h> +#include <util/string/cast.h> +#include <util/generic/map.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/ptr.h> +#include <util/generic/yexception.h> + +/** + * Parse C-style strings inside multiline comments + **/ +class TValuesContext: public TCppFullSax { +public: + void DoString(const TText& text) override { + Values.push_back(text.Data); + } + + ~TValuesContext() override { + } + + TVector<TString> Values; +}; + +static TVector<TString> ParseEnumValues(const TString& strValues) { + TVector<TString> result; + + TValuesContext ctx; + TCppSaxParser parser(&ctx); + TMemoryInput in(strValues.data(), strValues.size()); + TransferData(static_cast<IInputStream*>(&in), &parser); + parser.Finish(); + for (const auto& value : ctx.Values) { + Y_ENSURE(value.size() >= 2, "Invalid C-style string. "); + TString dequoted = value.substr(1, value.size() - 2); + // TODO: support C-unescaping + result.push_back(dequoted); + } + return result; +} + +/** + * Parse C++ fragment with one enum + **/ +class TEnumContext: public TCppFullSax { +public: + typedef TEnumParser::TItem TItem; + typedef TEnumParser::TEnum TEnum; + + TEnumContext(TEnum& currentEnum) + : CurrentEnum(currentEnum) + { + } + + ~TEnumContext() override { + } + + void AddEnumItem() { + if (!CurrentItem.CppName) { + // uninitialized element should have no value too + Y_ASSERT(!CurrentItem.Value.Defined()); + return; + } + + // enum item C++ name should not be empty + Y_ASSERT(CurrentItem.CppName); + CurrentItem.NormalizeValue(); + CurrentEnum.Items.push_back(CurrentItem); + CurrentItem.Clear(); + InEnumState = Begin; + } + + template<class T> + void AppendValue(const T& text) { + // by pg@ advice, do not parse enum value + // leave it to C++ compiler to parse/interpret + + if (!CurrentItem.Value) + CurrentItem.Value = TString(); + + *CurrentItem.Value += text; + } + + void DoEnd() override { + AddEnumItem(); + } + + void DoWhiteSpace(const TText& text) override { + if (InValue == InEnumState || InValueCall == InEnumState) { + AppendValue(text.Data); + } + } + + void DoSyntax(const TText& text) override { + // For some reason, parser sometimes passes chunks like '{};' here, + // so we handle each symbol separately. + for (const char& sym : text.Data) { + if ('{' == sym && InValue != InEnumState && InValueCall != InEnumState) { + BodyDetected = true; + continue; + } else if ('=' == sym && InValueCall != InEnumState) { + InEnumState = InValue; + continue; + } else if (('(' == sym || '{' == sym) && (InValue == InEnumState || InValueCall == InEnumState)) { + // there may be constexpr function / constructor / macro call in value part, + // handle them appropriately + InEnumState = InValueCall; + ++BracesBalance; + AppendValue(sym); + continue; + } else if ((')' == sym || '}' == sym) && InValueCall == InEnumState) { + if (!--BracesBalance) { + InEnumState = InValue; + } + AppendValue(sym); + continue; + } else if ((',' == sym || '}' == sym) && InValueCall != InEnumState) { + AddEnumItem(); + continue; + } else if (InValue == InEnumState || InValueCall == InEnumState) { + AppendValue(sym); + } + } + } + + void DoName(const TText& text) override { + if (!BodyDetected) { + return; + } + + if (InValue == InEnumState || InValueCall == InEnumState) { + AppendValue(text.Data); + return; + } + + CurrentItem.CppName = text.Data; + InEnumState = AfterCppName; + } + + void DoMultiLineComment(const TText& text) override { + Y_ENSURE(text.Data.size() >= 4, "Invalid multiline comment " << text.Data.Quote() << ". "); + TString commentText = text.Data.substr(2, text.Data.size() - 4); + commentText = StripString(commentText); + CurrentItem.CommentText = commentText; + CurrentItem.Aliases = ParseEnumValues(commentText); + + if (CurrentItem.Aliases && !CurrentItem.CppName) { + // this means we process multiline comment when item name was not set yet. + ythrow yexception() << "Are you hit with https://clubs.at.yandex-team.ru/stackoverflow/2603 typo? "; + } + } + + bool BodyDetected = false; + enum EInEnumState { + Begin, + AfterCppName, + InValue, + InValueCall, + End, + }; + EInEnumState InEnumState = Begin; + + TEnum& CurrentEnum; + TItem CurrentItem; + + size_t BracesBalance = 0; +}; + +/** + * Parse C++ file + **/ +class TCppContext: public TCppFullSax { +public: + typedef TEnumParser::TScope TScope; + typedef TEnumParser::TItem TItem; + typedef TEnumParser::TEnum TEnum; + typedef TEnumParser::TEnums TEnums; + + const TString NAMESPACE = "<namespace>"; + const TString CLASS = "<class>"; + const TString STRUCT = "<struct>"; + const TString ENUM = "<enum>"; + const TString BLOCK = "<block>"; + + TCppContext(const char* data, const TString& sourceFileName = TString()) + : Data(data) + , SourceFileName(sourceFileName) + { + } + + ~TCppContext() override { + } + + void DoSyntax(const TText& text) override { + // For some reason, parser sometimes passes chunks like '{};' here, + // so we handle each symbol separately. + const TString& syn = text.Data; + if (syn == "::" && InCompositeNamespace) { + LastScope += syn; + InCompositeNamespace = false; + ScopeDeclaration = true; + return; + } + for (size_t i = 0; i < syn.size(); ++i) { + if ('{' == syn[i]) { + OnEnterScope(text.Offset + i); + if (InEnum) { + CurrentEnum.BodyDetected = true; + } + } else if ('}' == syn[i]) { + OnLeaveScope(text.Offset + i); + } else if (';' == syn[i]) { + // Handle SEARCH-1392 + if (InEnum && !CurrentEnum.BodyDetected) { + CurrentEnum.ForwardDeclaration = true; + InEnum = false; + } + } + } + } + + void DoKeyword(const TText& text) override { + if (text.Data == "enum") { + Y_ENSURE(!InEnum, "Enums cannot be nested. "); + InEnum = true; + EnumPos = text.Offset; + CurrentEnum.Clear(); + CurrentEnum.Scope = Scope; + ScopeDeclaration = true; + NextScopeName = ENUM; + //PrintScope(); + } else if (text.Data == "class") { + if (InEnum) { + CurrentEnum.EnumClass = true; + return; + } + NextScopeName = CLASS; + ScopeDeclaration = true; + //PrintScope(); + } else if (text.Data == "struct") { + if (InEnum) { + CurrentEnum.EnumClass = true; + return; + } + NextScopeName = STRUCT; + ScopeDeclaration = true; + //PrintScope(); + } else if (text.Data == "namespace") { + NextScopeName = NAMESPACE; + LastScope.clear(); + ScopeDeclaration = true; + //PrintScope(); + } + } + + void DoName(const TText& text) override { + if (!ScopeDeclaration) { + return; + } + if (InEnum) { + CurrentEnum.CppName = text.Data; + } else { + if (NextScopeName == NAMESPACE) { + InCompositeNamespace = true; + LastScope += text.Data; + } else { + LastScope = text.Data; + } + } + ScopeDeclaration = false; + } + + void OnEnterScope(size_t /* offset */) { + if (ScopeDeclaration) { + // unnamed declaration or typedef + ScopeDeclaration = false; + } + InCompositeNamespace = false; + Scope.push_back(LastScope); + LastScope.clear(); + //PrintScope(); + } + + /// @param offset: terminating curly brace position + void OnLeaveScope(size_t offset) { + if (!Scope) { + size_t contextOffsetBegin = (offset >= 256) ? offset - 256 : 0; + TString codeContext = TString(Data + contextOffsetBegin, offset - contextOffsetBegin + 1); + ythrow yexception() << "C++ source parse failed: unbalanced scope. Did you miss a closing '}' bracket? " + "Context: enum " << CurrentEnum.CppName.Quote() << + " in scope " << TEnumParser::ScopeStr(CurrentEnum.Scope).Quote() << ". Code context:\n... " << + codeContext << " ..."; + } + Scope.pop_back(); + + if (InEnum) { + Y_ASSERT(offset > EnumPos); + InEnum = false; + try { + ParseEnum(Data + EnumPos, offset - EnumPos + 1); + } catch (...) { + TString ofFile; + if (SourceFileName) { + ofFile += " of file "; + ofFile += SourceFileName.Quote(); + } + ythrow yexception() << "Failed to parse enum " << CurrentEnum.CppName << + " in scope " << TEnumParser::ScopeStr(CurrentEnum.Scope) << ofFile << + "\n<C++ parser error message>: " << CurrentExceptionMessage(); + } + } + //PrintScope(); + } + + void ParseEnum(const char* data, size_t length) { + TEnumContext enumContext(CurrentEnum); + TMemoryInput in(data, length); + TCppSaxParser parser(&enumContext); + TransferData(&in, &parser); + parser.Finish(); + //PrintEnum(CurrentEnum); + Enums.push_back(CurrentEnum); + } + + // Some debug stuff goes here + static void PrintScope(const TScope& scope) { + Cerr << "Current scope: " << TEnumParser::ScopeStr(scope) << Endl; + } + + void PrintScope() { + PrintScope(Scope); + } + + void PrintEnum(const TEnum& en) { + Cerr << "Enum within scope " << TEnumParser::ScopeStr(en.Scope).Quote() << Endl; + for (const auto& item : en.Items) { + Cerr << " " << item.CppName; + if (item.Value) + Cerr << " = " << *item.Value; + Cerr << Endl; + for (const auto& value : item.Aliases) { + Cerr << " " << value << Endl; + } + } + } + + void PrintEnums() { + for (const auto& en : Enums) + PrintEnum(en); + } + +public: + TScope Scope; + TEnums Enums; +private: + const char* const Data; + TString SourceFileName; + + bool InEnum = false; + bool ScopeDeclaration = false; + bool InCompositeNamespace = false; + TString NextScopeName = BLOCK; + TString LastScope; + size_t EnumPos = 0; + TEnum CurrentEnum; +}; + + +TEnumParser::TEnumParser(const TString& fileName) { + THolder<IInputStream> hIn; + IInputStream* in = nullptr; + if (fileName != "-") { + SourceFileName = fileName; + hIn.Reset(new TFileInput(fileName)); + in = hIn.Get(); + } else { + in = &Cin; + } + TString contents = in->ReadAll(); + Parse(contents.data(), contents.size()); +} + +TEnumParser::TEnumParser(const char* data, size_t length) { + Parse(data, length); +} + +TEnumParser::TEnumParser(IInputStream& in) { + TString contents = in.ReadAll(); + Parse(contents.data(), contents.size()); +} + +void TEnumParser::Parse(const char* data, size_t length) { + const TStringBuf span(data, length); + const bool hasPragmaOnce = span.Contains("#pragma once"); + const bool isProtobufHeader = span.Contains("// Generated by the protocol buffer compiler"); + const bool isFlatbuffersHeader = span.Contains("// automatically generated by the FlatBuffers compiler"); + Y_ENSURE( + hasPragmaOnce || isProtobufHeader || isFlatbuffersHeader, + "Serialization functions can be generated only for enums in header files, see SEARCH-975. " + ); + TCppContext cppContext(data, SourceFileName); + TMemoryInput in(data, length); + TCppSaxParser parser(&cppContext); + TransferData(&in, &parser); + parser.Finish(); + //cppContext.PrintEnums(); + // obtain result + Enums = cppContext.Enums; + if (cppContext.Scope) { + cppContext.PrintScope(); + ythrow yexception() << "Unbalanced scope, something is wrong with enum parser. "; + } +} diff --git a/tools/enum_parser/parse_enum/parse_enum.h b/tools/enum_parser/parse_enum/parse_enum.h new file mode 100644 index 0000000000..ef8b512ae4 --- /dev/null +++ b/tools/enum_parser/parse_enum/parse_enum.h @@ -0,0 +1,78 @@ +#pragma once + +#include <util/stream/output.h> +#include <util/stream/input.h> +#include <util/stream/mem.h> +#include <util/string/strip.h> +#include <util/generic/maybe.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> + +class TEnumParser { +public: + + struct TItem { + TMaybe<TString> Value; + TString CppName; + TVector<TString> Aliases; + TString CommentText; + + void Clear() { + *this = TItem(); + } + + void NormalizeValue() { + if (!Value) + return; + StripInPlace(*Value); + } + + }; + + // vector is to preserve declaration order + typedef TVector<TItem> TItems; + + typedef TVector<TString> TScope; + + struct TEnum { + TItems Items; + TString CppName; + TScope Scope; + // enum or enum class + bool EnumClass = false; + bool BodyDetected = false; + bool ForwardDeclaration = false; + + void Clear() { + *this = TEnum(); + } + }; + + typedef TVector<TEnum> TEnums; + + /// Parse results stored here + TEnums Enums; + + /// Parse enums from file containing C++ code + TEnumParser(const TString& fileName); + + /// Parse enums from memory buffer containing C++ code + TEnumParser(const char* data, size_t length); + + /// Parse enums from input stream + TEnumParser(IInputStream& in); + + static TString ScopeStr(const TScope& scope) { + TString result; + for (const TString& name : scope) { + result += name; + result += "::"; + } + return result; + } + +private: + void Parse(const char* data, size_t length); +protected: + TString SourceFileName; +}; diff --git a/tools/enum_parser/parse_enum/parse_enum_ut.cpp b/tools/enum_parser/parse_enum/parse_enum_ut.cpp new file mode 100644 index 0000000000..21ed6a2fc4 --- /dev/null +++ b/tools/enum_parser/parse_enum/parse_enum_ut.cpp @@ -0,0 +1,315 @@ +#include <library/cpp/resource/resource.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <tools/enum_parser/parse_enum/parse_enum.h> + +typedef TEnumParser::TEnum TEnum; +typedef TEnumParser::TEnums TEnums; +typedef TEnumParser::TItems TItems; + +Y_UNIT_TEST_SUITE(TEnumParserTest) { + + Y_UNIT_TEST(MainTest) { + TString text = NResource::Find("/enums"); + TMemoryInput input(text.data(), text.size()); + TEnumParser parser(input); + const TEnums& enums = parser.Enums; + + UNIT_ASSERT_VALUES_EQUAL(enums.size(), 16u); + + // check ESimple + { + const TEnum& e = enums[0]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ESimple"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Http"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases.size(), 0u); + UNIT_ASSERT(!it[0].Value.Defined()); + + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "Https"); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases.size(), 0u); + UNIT_ASSERT(!it[1].Value.Defined()); + + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "ItemCount"); + UNIT_ASSERT_VALUES_EQUAL(it[2].Aliases.size(), 0u); + UNIT_ASSERT(!it[2].Value.Defined()); + } + + // ESimpleWithComma + { + const TEnum& e = enums[1]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ESimpleWithComma"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 4u); + + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Http"); + UNIT_ASSERT(it[0].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "3"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases.size(), 0u); + + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "Http2"); + UNIT_ASSERT(it[1].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "Http"); + + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "Https"); + UNIT_ASSERT_VALUES_EQUAL(it[2].Aliases.size(), 0u); + UNIT_ASSERT(!it[2].Value.Defined()); + + UNIT_ASSERT_VALUES_EQUAL(it[3].CppName, "ItemCount"); + UNIT_ASSERT_VALUES_EQUAL(it[3].Aliases.size(), 0u); + UNIT_ASSERT(!it[3].Value.Defined()); + } + + // check ECustomAliases + { + const TEnum& e = enums[2]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ECustomAliases"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "CAHttp"); + UNIT_ASSERT(it[0].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "3"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases[0], "http"); + + UNIT_ASSERT(!it[1].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "CAHttps"); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases[0], "https"); + + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "CAItemCount"); + UNIT_ASSERT_VALUES_EQUAL(it[2].Aliases.size(), 0u); + } + + // check EMultipleAliases + { + const TEnum& e = enums[3]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "EMultipleAliases"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "MAHttp"); + UNIT_ASSERT(it[0].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "9"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases[0], "http://"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases[1], "secondary"); + // yes, quoted values are NOT decoded, it is a known (minor) bug + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases[2], "old\\nvalue"); + + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "MAHttps"); + UNIT_ASSERT(it[1].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "1"); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases[0], "https://"); + + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "MAItemCount"); + UNIT_ASSERT(!it[2].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(it[2].Aliases.size(), 0u); + } + + // check NEnumNamespace::EInNamespace + { + const TEnum& e = enums[4]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[0], "NEnumNamespace"); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "EInNamespace"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Http"); + UNIT_ASSERT(it[0].Value.Defined()); + } + + // check NEnumNamespace::TEnumClass::EInClass + { + const TEnum& e = enums[5]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[0], "NEnumNamespace"); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[1], "TEnumClass"); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "EInClass"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Http"); + UNIT_ASSERT(it[0].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "9"); + + UNIT_ASSERT(it[1].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "NEnumNamespace::Https"); + + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "Https3"); + UNIT_ASSERT(it[2].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[2].Value, "1 + 2"); + } + + // check unnamed enum (no code should be generated for it) + { + const TEnum& e = enums[6]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, ""); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + } + + // TEXT_WEIGHT + { + const TEnum& e = enums[7]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "TEXT_WEIGHT"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 5u); + + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "WEIGHT_ZERO"); + UNIT_ASSERT(it[0].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "-1"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases.size(), 0u); + + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "WEIGHT_LOW"); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases.size(), 0u); + UNIT_ASSERT(!it[1].Value.Defined()); + + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "WEIGHT_NORMAL"); + UNIT_ASSERT_VALUES_EQUAL(it[2].Aliases.size(), 0u); + UNIT_ASSERT(!it[2].Value.Defined()); + } + + // EDuplicateKeys + { + const TEnum& e = enums[8]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "EDuplicateKeys"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 5u); + + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Key0"); + UNIT_ASSERT(it[0].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "0"); + UNIT_ASSERT_VALUES_EQUAL(it[0].Aliases.size(), 0u); + + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "Key0Second"); + UNIT_ASSERT(it[1].Value.Defined()); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "Key0"); + UNIT_ASSERT_VALUES_EQUAL(it[1].Aliases.size(), 0u); + } + + // EEmpty + { + const TEnum& e = enums[10]; + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 0u); + } + + // NComposite::NInner::EInCompositeNamespaceSimple + { + const TEnum& e = enums[11]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[0], "NComposite::NInner"); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "EInCompositeNamespaceSimple"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "one"); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "2") ; + } + + // NOuterSimple::NComposite::NMiddle::NInner::NInnerSimple::TEnumClass::EVeryDeep + { + const TEnum& e = enums[12]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 4u); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[0], "NOuterSimple"); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[1], "NComposite::NMiddle::NInner"); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[2], "NInnerSimple"); + UNIT_ASSERT_VALUES_EQUAL(e.Scope[3], "TEnumClass"); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "EVeryDeep"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Key0"); + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "Key1"); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "1"); + } + + // ENonLiteralValues + { + const TEnum& e = enums[13]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ENonLiteralValues"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 5u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "one"); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "MACRO(1, 2)"); + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "two"); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "2"); + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "three"); + UNIT_ASSERT_VALUES_EQUAL(*it[2].Value, "func(3)"); + UNIT_ASSERT_VALUES_EQUAL(it[3].CppName, "four"); + UNIT_ASSERT_VALUES_EQUAL(it[3].Value.Defined(), false); + UNIT_ASSERT_VALUES_EQUAL(it[4].CppName, "five"); + UNIT_ASSERT_VALUES_EQUAL(it[4].Value, "MACRO(MACRO(1, 2), 2)"); + } + + // NotifyingStatus + { + const TEnum& e = enums[15]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "NotifyingStatus"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 4u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "NEW"); + UNIT_ASSERT_VALUES_EQUAL(*it[0].Value, "0"); + UNIT_ASSERT_VALUES_EQUAL(it[1].CppName, "FAILED_WILL_RETRY"); + UNIT_ASSERT_VALUES_EQUAL(*it[1].Value, "1"); + UNIT_ASSERT_VALUES_EQUAL(it[2].CppName, "FAILED_NO_MORE_TRIALS"); + UNIT_ASSERT_VALUES_EQUAL(*it[2].Value, "2"); + UNIT_ASSERT_VALUES_EQUAL(it[3].CppName, "SENT"); + UNIT_ASSERT_VALUES_EQUAL(*it[3].Value, "3"); + } + } + + Y_UNIT_TEST(BadCodeParseTest) { + TString text = NResource::Find("/badcode"); + TMemoryInput input(text.data(), text.size()); + TEnumParser parser(input); + const TEnums& enums = parser.Enums; + + UNIT_ASSERT_VALUES_EQUAL(enums.size(), 1u); + + // check <anonymous namespace>::ETest correct parsing + { + const TEnum& e = enums[0]; + UNIT_ASSERT_VALUES_EQUAL(e.Scope.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(e.CppName, "ETest"); + const TItems& it = e.Items; + UNIT_ASSERT_VALUES_EQUAL(it.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(it[0].CppName, "Http"); + UNIT_ASSERT(it[0].Value.Defined()); + } + + } + + Y_UNIT_TEST(UnbalancedCodeParseTest) { + // Thanks gotmanov@ for providing this example + TString text = NResource::Find("/unbalanced"); + TMemoryInput input(text.data(), text.size()); + try { + TEnumParser parser(input); + UNIT_ASSERT(false); + } catch(...) { + UNIT_ASSERT(CurrentExceptionMessage().Contains("unbalanced scope. Did you miss a closing")); + } + } + + Y_UNIT_TEST(AliasBeforeNameTest) { + TString text = NResource::Find("/alias_before_name"); + TMemoryInput input(text.data(), text.size()); + try { + TEnumParser parser(input); + UNIT_ASSERT(false); + } catch(...) { + UNIT_ASSERT(CurrentExceptionMessage().Contains("https://clubs.at.yandex-team.ru/stackoverflow/2603")); + } + } +} diff --git a/tools/enum_parser/parse_enum/ut/alias_before_name.h b/tools/enum_parser/parse_enum/ut/alias_before_name.h new file mode 100644 index 0000000000..64015c1db6 --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/alias_before_name.h @@ -0,0 +1,7 @@ +#pragma once + +// https://clubs.at.yandex-team.ru/stackoverflow/2603 bad example +enum EStrange { + One, /* "one" */ + Two, /* "two" */ +}; diff --git a/tools/enum_parser/parse_enum/ut/badcode.h b/tools/enum_parser/parse_enum/ut/badcode.h new file mode 100644 index 0000000000..88448c8eae --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/badcode.h @@ -0,0 +1,10 @@ +#pragma once + +// Anonymous namespaces are meaningless, but should not break our parser +namespace { + enum ETest { + Http = 9 /* "http://" "secondary" "old\nvalue" */, + Https = 1 /* "https://" */, + ETestItemCount, + }; +} diff --git a/tools/enum_parser/parse_enum/ut/enums.cpp b/tools/enum_parser/parse_enum/ut/enums.cpp new file mode 100644 index 0000000000..a03045855e --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/enums.cpp @@ -0,0 +1,195 @@ +#include "enums.h" +#include "enums_with_header.h" +#include <tools/enum_parser/parse_enum/ut/enums_with_header.h_serialized.h> + +#include "including_header.h" + +// just to test that generated stuff works +#include <util/generic/serialized_enum.h> +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/ptr.h> +#include <util/generic/singleton.h> + + +void FunctionUsingEFwdEnum(EFwdEnum) { +} + +class TEnumSerializationInitializer { +public: + TEnumSerializationInitializer() { + UNIT_ASSERT_VALUES_EQUAL(ToString(EDestructionPriorityTest::first), "first"); + } + ~TEnumSerializationInitializer() { + UNIT_ASSERT_VALUES_EQUAL(ToString(EDestructionPriorityTest::second), "second"); + } +}; + +class TEnumSerializationInitializerHolder { +public: + TEnumSerializationInitializerHolder() { + } + + ~TEnumSerializationInitializerHolder() { + } + + void Init() { Ptr.Reset(new TEnumSerializationInitializer); } +private: + THolder<TEnumSerializationInitializer> Ptr; +}; + + +Y_UNIT_TEST_SUITE(TEnumGeneratorTest) { + + template<typename T> + void CheckToString(const T& value, const TString& strValue) { + UNIT_ASSERT_VALUES_EQUAL(ToString(value), strValue); + } + + Y_UNIT_TEST(ToStringTest) { + // ESimple + CheckToString(Http, "Http"); + CheckToString(Https, "Https"); + CheckToString(ItemCount, "ItemCount"); + + // ESimpleWithComma + CheckToString(ESimpleWithComma::Http, "Http"); + CheckToString(ESimpleWithComma::Https, "Https"); + CheckToString(ESimpleWithComma::Http2, "Http"); // Http2 is an alias for Http + CheckToString(ESimpleWithComma::ItemCount, "ItemCount"); + + // ECustomAliases + CheckToString(CAHttp, "http"); + CheckToString(CAHttps, "https"); + CheckToString(CAItemCount, "CAItemCount"); + + // EMultipleAliases + CheckToString(MAHttp, "http://"); + CheckToString(MAHttps, "https://"); + CheckToString(MAItemCount, "MAItemCount"); + + // EDuplicateKeys + CheckToString(Key0, "Key0"); + CheckToString(Key0Second, "Key0"); // obtain FIRST encountered value with such integer key + CheckToString(Key1, "Key1"); + CheckToString(Key2, "k2"); + CheckToString(Key3, "k2"); // we CANNOT obtain "k3" here (as Key3 == Key2) + } + + template<typename T> + void CheckFromString(const TString& strValue, const T& value) { + UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(FromString<T>(TStringBuf(strValue))), static_cast<int>(value)); + } + + template<typename T> + void CheckFromStringFail(const TString& strValue) { + UNIT_ASSERT_EXCEPTION(FromString<T>(TStringBuf(strValue)), yexception); + } + + template<typename T> + void CheckTryFromString(const TString& strValue, const T& value) { + T x; + UNIT_ASSERT_VALUES_EQUAL(TryFromString(TStringBuf(strValue), x), true); + UNIT_ASSERT_VALUES_EQUAL(x, value); + } + + template<typename T> + void CheckTryFromStringFail(const TString& strValue) { + T x = T(-666); + UNIT_ASSERT_VALUES_EQUAL(TryFromString(TStringBuf(strValue), x), false); + UNIT_ASSERT_VALUES_EQUAL(int(x), -666); + } + + Y_UNIT_TEST(TryFromStringTest) { + // ESimple + CheckFromString("Http", Http); + CheckFromString("Https", Https); + CheckFromString("ItemCount", ItemCount); + CheckFromStringFail<ESimple>("ItemC0unt"); + + CheckTryFromString("Http", Http); + CheckTryFromString("Https", Https); + CheckTryFromString("ItemCount", ItemCount); + CheckTryFromStringFail<ESimple>("ItemC0unt"); + + // ESimpleWithComma + CheckTryFromString("Http", ESimpleWithComma::Http); + CheckTryFromString("Https", ESimpleWithComma::Https); + CheckTryFromString("ItemCount", ESimpleWithComma::ItemCount); + CheckTryFromStringFail<ESimpleWithComma>(""); + + // ECustomAliases + CheckTryFromString("http", CAHttp); + CheckTryFromString("https", CAHttps); + CheckTryFromString("CAItemCount", CAItemCount); + + // EDuplicateKeys + CheckTryFromString("Key0", Key0); + CheckTryFromString("Key0Second", Key0Second); + CheckTryFromString("Key1", Key1); + CheckTryFromString("k2", Key2); + CheckTryFromString("k2.1", Key2); + CheckTryFromString("k3", Key3); + } + + Y_UNIT_TEST(AllNamesValuesTest) { + { + auto allNames = GetEnumAllCppNames<EDuplicateKeys>(); + UNIT_ASSERT(!!allNames); + UNIT_ASSERT_VALUES_EQUAL(allNames.size(), 5u); + UNIT_ASSERT_VALUES_EQUAL(allNames[4], "Key3"); + } + { + auto allNames = GetEnumAllCppNames<ESimpleWithComma>(); + UNIT_ASSERT(!!allNames); + UNIT_ASSERT_VALUES_EQUAL(allNames.size(), 4u); + UNIT_ASSERT_VALUES_EQUAL(allNames[1], "ESimpleWithComma::Http2"); + } + } + + Y_UNIT_TEST(EnumWithHeaderTest) { + UNIT_ASSERT_VALUES_EQUAL(GetEnumItemsCount<EWithHeader>(), 3); + } + + Y_UNIT_TEST(AllNamesValuesWithHeaderTest) { + { + auto allNames = GetEnumAllCppNames<EWithHeader>(); + UNIT_ASSERT_VALUES_EQUAL(allNames.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(allNames.at(2), "HThree"); + } + { + UNIT_ASSERT_VALUES_EQUAL(GetEnumAllNames<EWithHeader>(), "'one', 'HTwo', 'HThree'"); + } + } + + Y_UNIT_TEST(AllValuesTest) { + const auto& allNames = GetEnumNames<EWithHeader>(); + const auto& allValues = GetEnumAllValues<EWithHeader>(); + UNIT_ASSERT_VALUES_EQUAL(allValues.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(allValues[2], HThree); + size_t size = 0; + for (const EWithHeader value : GetEnumAllValues<EWithHeader>()) { + size += 1; + UNIT_ASSERT_VALUES_EQUAL(allNames.contains(value), true); + } + UNIT_ASSERT_VALUES_EQUAL(size, 3u); + } + + Y_UNIT_TEST(EnumNamesTest) { + const auto& names = GetEnumNames<EWithHeader>(); + UNIT_ASSERT_VALUES_EQUAL(names.size(), 3u); + + UNIT_ASSERT(names.contains(HOne)); + UNIT_ASSERT_VALUES_EQUAL(names.at(HOne), "one"); + + UNIT_ASSERT(names.contains(HTwo)); + UNIT_ASSERT_VALUES_EQUAL(names.at(HTwo), "HTwo"); + + UNIT_ASSERT(names.contains(HThree)); + UNIT_ASSERT_VALUES_EQUAL(names.at(HThree), "HThree"); + } + + Y_UNIT_TEST(EnumSerializerDestructionPriority) { + Singleton<TEnumSerializationInitializerHolder>()->Init(); + } +}; diff --git a/tools/enum_parser/parse_enum/ut/enums.h b/tools/enum_parser/parse_enum/ut/enums.h new file mode 100644 index 0000000000..93d835c78d --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/enums.h @@ -0,0 +1,195 @@ +#pragma once +// Sample file for parse_enum unittests + +#include <util/generic/fwd.h> +#include <util/system/compiler.h> + +// Test template declarations +template<class T> +void Func(T&); + +template<> +void Func(struct ENonDeclared&); + +template<class TClass> class TFwdDecl; + +// Test in-function class declarations +void InexistentFunction(struct TFwdStructDecl); +void InexistentFunction2(struct TFwdStructDecl, class TMegaClass); + + +static inline void Func() { + class TLocal { + int M; + public: + void F() { + // to shut up clang + Y_UNUSED(M); + } + }; + + { + // unnamed block + } +} + +// Test forward declarations, pt 2 +namespace NTestContainer { + struct TStruct; +} + +// Enums +enum ESimple { + Http, + Https, + ItemCount +}; + +enum class ESimpleWithComma { + Http = 3, + Http2 = Http, + Https, // 4 + ItemCount, // 5 +}; + +enum ECustomAliases { + CAHttp = 3 /* "http" */, + CAHttps /* "https" */, + CAItemCount, +}; + +enum EMultipleAliases { + MAHttp = 9 /* "http://" "secondary" "old\nvalue" */, + MAHttps = 1 /* "https://" */, + MAItemCount, +}; + +namespace NEnumNamespace { + enum EInNamespace { + Http = 9 /* "http://" "secondary" "old\nvalue" */, + Https = 1 /* "https://" */, + ItemCount /* "real value" */, + }; +}; + +struct TStruct { + int M; +}; + +namespace NEnumNamespace { + class TEnumClass: public TStruct { + public: + enum EInClass { + Http = 9 /* "http://" "secondary" "old\nvalue" */, + Https1 = NEnumNamespace::Https /* "https://" */, + // holy crap, this will work too: + Https3 = 1 /* "https://" */ + 2, + }; + }; +} + +enum { + One, + Two, + Three, +}; + +struct { + int M; +} SomeStruct; + +static inline void f() { + (void)(SomeStruct); + (void)(f); +} + +// buggy case taken from library/cpp/html/face/parstypes.h +enum TEXT_WEIGHT { + WEIGHT_ZERO=-1,// NOINDEX_RELEV + WEIGHT_LOW, // LOW_RELEV + WEIGHT_NORMAL, // NORMAL_RELEV + WEIGHT_HIGH, // HIGH_RELEV (H1,H2,H3,ADDRESS,CAPTION) + WEIGHT_BEST // BEST_RELEV (TITLE) +}; + +// enum with duplicate keys +enum EDuplicateKeys { + Key0 = 0, + Key0Second = Key0, + Key1, + Key2 = 3 /* "k2" "k2.1" */, + Key3 = 3 /* "k3" */, +}; + +enum class EFwdEnum; +void FunctionUsingEFwdEnum(EFwdEnum); +enum class EFwdEnum { + One, + Two +}; + +// empty enum (bug found by sankear@) +enum EEmpty { +}; + +namespace NComposite::NInner { + enum EInCompositeNamespaceSimple { + one, + two = 2, + three, + }; +} + +namespace NOuterSimple { + namespace NComposite::NMiddle::NInner { + namespace NInnerSimple { + class TEnumClass { + public: + enum EVeryDeep { + Key0 = 0, + Key1 = 1, + }; + }; + } + } +} + + +constexpr int func(int value) { + return value; +} + +#define MACRO(x, y) x + +// enum with nonliteral values +enum ENonLiteralValues { + one = MACRO(1, 2), + two = 2, + three = func(3), + four, + five = MACRO(MACRO(1, 2), 2), +}; + +#undef MACRO + + +enum EDestructionPriorityTest { + first, + second +}; + + +enum class NotifyingStatus +{ + NEW = 0, + FAILED_WILL_RETRY = 1, + FAILED_NO_MORE_TRIALS = 2, + SENT = 3 +}; + +/* + * Still unsupported features: + * + * a) Anonymous namespaces (it is parsed correctly, though) + * b) Enums inside template classes (impossible by design) + **/ diff --git a/tools/enum_parser/parse_enum/ut/enums_with_header.h b/tools/enum_parser/parse_enum/ut/enums_with_header.h new file mode 100644 index 0000000000..26fe5565a9 --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/enums_with_header.h @@ -0,0 +1,8 @@ +#pragma once + +enum EWithHeader { + HOne /* "one" */, + HTwo, + HThree, +}; + diff --git a/tools/enum_parser/parse_enum/ut/including_header.h b/tools/enum_parser/parse_enum/ut/including_header.h new file mode 100644 index 0000000000..b3b2a2129f --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/including_header.h @@ -0,0 +1,9 @@ +#pragma once + +#include <util/generic/serialized_enum.h> +#include <tools/enum_parser/parse_enum/ut/enums_with_header.h_serialized.h> + +int TestEnumWithHeader() { + return GetEnumItemsCount<EWithHeader>(); +} + diff --git a/tools/enum_parser/parse_enum/ut/stringlist.cpp b/tools/enum_parser/parse_enum/ut/stringlist.cpp new file mode 100644 index 0000000000..f69d0fc08d --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/stringlist.cpp @@ -0,0 +1 @@ +"qqqq\nqqqqqq", "test:\\string" diff --git a/tools/enum_parser/parse_enum/ut/unbalanced.h b/tools/enum_parser/parse_enum/ut/unbalanced.h new file mode 100644 index 0000000000..9caf54044c --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/unbalanced.h @@ -0,0 +1,4 @@ +#pragma once + +} + diff --git a/tools/enum_parser/parse_enum/ut/ya.make b/tools/enum_parser/parse_enum/ut/ya.make new file mode 100644 index 0000000000..03ace866d4 --- /dev/null +++ b/tools/enum_parser/parse_enum/ut/ya.make @@ -0,0 +1,33 @@ +UNITTEST() + +OWNER( + g:util + mvel +) + +PEERDIR( + ADDINCL tools/enum_parser/parse_enum + library/cpp/resource +) + +SRCDIR(tools/enum_parser/parse_enum) + +RESOURCE( + enums.h /enums + badcode.h /badcode + unbalanced.h /unbalanced + alias_before_name.h /alias_before_name +) + +# self-test +GENERATE_ENUM_SERIALIZATION(enums.h) + +# test GENERATE_ENUM_SERIALIZATION_WITH_HEADER macro +GENERATE_ENUM_SERIALIZATION_WITH_HEADER(enums_with_header.h) + +SRCS( + parse_enum_ut.cpp + enums.cpp +) + +END() diff --git a/tools/enum_parser/parse_enum/ya.make b/tools/enum_parser/parse_enum/ya.make new file mode 100644 index 0000000000..b8d07c66d2 --- /dev/null +++ b/tools/enum_parser/parse_enum/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +OWNER( + g:util + mvel +) + +SRCS( + parse_enum.cpp +) + +PEERDIR( + library/cpp/cppparser +) + +END() |