#include "parse_enum.h"
#include <library/cpp/cppparser/parser.h>
#include <util/stream/file.h>
#include <util/stream/output.h>
#include <util/stream/input.h>
#include <util/stream/mem.h>
#include <util/charset/wide.h>
#include <util/string/strip.h>
#include <util/string/cast.h>
#include <util/generic/map.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/generic/ptr.h>
#include <util/generic/yexception.h>
/**
* Parse C-style strings inside multiline comments
**/
class TValuesContext: public TCppFullSax {
public:
void DoString(const TText& text) override {
Values.push_back(text.Data);
}
~TValuesContext() override {
}
TVector<TString> Values;
};
static TVector<TString> ParseEnumValues(const TString& strValues) {
TVector<TString> result;
TValuesContext ctx;
TCppSaxParser parser(&ctx);
TMemoryInput in(strValues.data(), strValues.size());
TransferData(static_cast<IInputStream*>(&in), &parser);
parser.Finish();
for (const auto& value : ctx.Values) {
Y_ENSURE(value.size() >= 2, "Invalid C-style string. ");
TString dequoted = value.substr(1, value.size() - 2);
// TODO: support C-unescaping
result.push_back(dequoted);
}
return result;
}
/**
* Parse C++ fragment with one enum
**/
class TEnumContext: public TCppFullSax {
public:
typedef TEnumParser::TItem TItem;
typedef TEnumParser::TEnum TEnum;
TEnumContext(TEnum& currentEnum)
: CurrentEnum(currentEnum)
{
}
~TEnumContext() override {
}
void AddEnumItem() {
if (!CurrentItem.CppName) {
// uninitialized element should have no value too
Y_ASSERT(!CurrentItem.Value.Defined());
return;
}
// enum item C++ name should not be empty
Y_ASSERT(CurrentItem.CppName);
CurrentItem.NormalizeValue();
CurrentEnum.Items.push_back(CurrentItem);
CurrentItem.Clear();
InEnumState = Begin;
}
template<class T>
void AppendValue(const T& text) {
// by pg@ advice, do not parse enum value
// leave it to C++ compiler to parse/interpret
if (!CurrentItem.Value)
CurrentItem.Value = TString();
*CurrentItem.Value += text;
}
void DoEnd() override {
AddEnumItem();
}
void DoWhiteSpace(const TText& text) override {
if (InValue == InEnumState || InValueCall == InEnumState) {
AppendValue(text.Data);
}
}
void DoSyntax(const TText& text) override {
// For some reason, parser sometimes passes chunks like '{};' here,
// so we handle each symbol separately.
for (const char& sym : text.Data) {
if ('{' == sym && InValue != InEnumState && InValueCall != InEnumState) {
BodyDetected = true;
continue;
} else if ('=' == sym && InValueCall != InEnumState) {
InEnumState = InValue;
continue;
} else if (('(' == sym || '{' == sym) && (InValue == InEnumState || InValueCall == InEnumState)) {
// there may be constexpr function / constructor / macro call in value part,
// handle them appropriately
InEnumState = InValueCall;
++BracesBalance;
AppendValue(sym);
continue;
} else if ((')' == sym || '}' == sym) && InValueCall == InEnumState) {
if (!--BracesBalance) {
InEnumState = InValue;
}
AppendValue(sym);
continue;
} else if ((',' == sym || '}' == sym) && InValueCall != InEnumState) {
AddEnumItem();
continue;
} else if (InValue == InEnumState || InValueCall == InEnumState) {
AppendValue(sym);
}
}
}
void DoName(const TText& text) override {
if (!BodyDetected) {
return;
}
if (InValue == InEnumState || InValueCall == InEnumState) {
AppendValue(text.Data);
return;
}
CurrentItem.CppName = text.Data;
InEnumState = AfterCppName;
}
void DoKeyword(const TText& text) override {
if (InValue == InEnumState || InValueCall == InEnumState) {
AppendValue(text.Data);
return;
}
}
void DoCharacter(const TText& text) override {
if (InValue == InEnumState || InValueCall == InEnumState) {
AppendValue(text.Data);
return;
}
}
void DoMultiLineComment(const TText& text) override {
Y_ENSURE(text.Data.size() >= 4, "Invalid multiline comment " << text.Data.Quote() << ". ");
TString commentText = text.Data.substr(2, text.Data.size() - 4);
commentText = StripString(commentText);
CurrentItem.CommentText = commentText;
CurrentItem.Aliases = ParseEnumValues(commentText);
if (CurrentItem.Aliases && !CurrentItem.CppName) {
// this means we process multiline comment when item name was not set yet.
ythrow yexception() << "Are you hit with https://clubs.at.yandex-team.ru/stackoverflow/2603 typo? ";
}
}
bool BodyDetected = false;
enum EInEnumState {
Begin,
AfterCppName,
InValue,
InValueCall,
End,
};
EInEnumState InEnumState = Begin;
TEnum& CurrentEnum;
TItem CurrentItem;
size_t BracesBalance = 0;
};
/**
* Parse C++ file
**/
class TCppContext: public TCppFullSax {
public:
typedef TEnumParser::TScope TScope;
typedef TEnumParser::TItem TItem;
typedef TEnumParser::TEnum TEnum;
typedef TEnumParser::TEnums TEnums;
const TString NAMESPACE = "<namespace>";
const TString CLASS = "<class>";
const TString STRUCT = "<struct>";
const TString ENUM = "<enum>";
const TString BLOCK = "<block>";
TCppContext(const char* data, const TString& sourceFileName = TString())
: Data(data)
, SourceFileName(sourceFileName)
{
}
~TCppContext() override {
}
void DoSyntax(const TText& text) override {
// For some reason, parser sometimes passes chunks like '{};' here,
// so we handle each symbol separately.
const TString& syn = text.Data;
if (syn == "::" && InCompositeNamespace) {
LastScope += syn;
InCompositeNamespace = false;
ScopeDeclaration = true;
return;
}
for (size_t i = 0; i < syn.size(); ++i) {
if ('{' == syn[i]) {
OnEnterScope(text.Offset + i);
if (InEnum) {
CurrentEnum.BodyDetected = true;
}
} else if ('}' == syn[i]) {
OnLeaveScope(text.Offset + i);
} else if (';' == syn[i]) {
// Handle SEARCH-1392
if (InEnum && !CurrentEnum.BodyDetected) {
CurrentEnum.ForwardDeclaration = true;
InEnum = false;
}
}
}
}
void DoKeyword(const TText& text) override {
if (text.Data == "enum") {
Y_ENSURE(!InEnum, "Enums cannot be nested. ");
InEnum = true;
EnumPos = text.Offset;
CurrentEnum.Clear();
CurrentEnum.Scope = Scope;
ScopeDeclaration = true;
NextScopeName = ENUM;
//PrintScope();
} else if (text.Data == "class") {
if (InEnum) {
CurrentEnum.EnumClass = true;
return;
}
NextScopeName = CLASS;
ScopeDeclaration = true;
//PrintScope();
} else if (text.Data == "struct") {
if (InEnum) {
CurrentEnum.EnumClass = true;
return;
}
NextScopeName = STRUCT;
ScopeDeclaration = true;
//PrintScope();
} else if (text.Data == "namespace") {
NextScopeName = NAMESPACE;
LastScope.clear();
ScopeDeclaration = true;
//PrintScope();
}
}
void DoName(const TText& text) override {
if (!ScopeDeclaration) {
return;
}
if (InEnum) {
CurrentEnum.CppName = text.Data;
} else {
if (NextScopeName == NAMESPACE) {
InCompositeNamespace = true;
LastScope += text.Data;
} else {
LastScope = text.Data;
}
}
ScopeDeclaration = false;
}
void OnEnterScope(size_t /* offset */) {
if (ScopeDeclaration) {
// unnamed declaration or typedef
ScopeDeclaration = false;
}
InCompositeNamespace = false;
Scope.push_back(LastScope);
LastScope.clear();
//PrintScope();
}
/// @param offset: terminating curly brace position
void OnLeaveScope(size_t offset) {
if (!Scope) {
size_t contextOffsetBegin = (offset >= 256) ? offset - 256 : 0;
TString codeContext = TString(Data + contextOffsetBegin, offset - contextOffsetBegin + 1);
ythrow yexception() << "C++ source parse failed: unbalanced scope. Did you miss a closing '}' bracket? "
"Context: enum " << CurrentEnum.CppName.Quote() <<
" in scope " << TEnumParser::ScopeStr(CurrentEnum.Scope).Quote() << ". Code context:\n... " <<
codeContext << " ...";
}
Scope.pop_back();
if (InEnum) {
Y_ASSERT(offset > EnumPos);
InEnum = false;
try {
ParseEnum(Data + EnumPos, offset - EnumPos + 1);
} catch (...) {
TString ofFile;
if (SourceFileName) {
ofFile += " of file ";
ofFile += SourceFileName.Quote();
}
ythrow yexception() << "Failed to parse enum " << CurrentEnum.CppName <<
" in scope " << TEnumParser::ScopeStr(CurrentEnum.Scope) << ofFile <<
"\n<C++ parser error message>: " << CurrentExceptionMessage();
}
}
//PrintScope();
}
void ParseEnum(const char* data, size_t length) {
TEnumContext enumContext(CurrentEnum);
TMemoryInput in(data, length);
TCppSaxParser parser(&enumContext);
TransferData(&in, &parser);
parser.Finish();
//PrintEnum(CurrentEnum);
Enums.push_back(CurrentEnum);
}
// Some debug stuff goes here
static void PrintScope(const TScope& scope) {
Cerr << "Current scope: " << TEnumParser::ScopeStr(scope) << Endl;
}
void PrintScope() {
PrintScope(Scope);
}
void PrintEnum(const TEnum& en) {
Cerr << "Enum within scope " << TEnumParser::ScopeStr(en.Scope).Quote() << Endl;
for (const auto& item : en.Items) {
Cerr << " " << item.CppName;
if (item.Value)
Cerr << " = " << *item.Value;
Cerr << Endl;
for (const auto& value : item.Aliases) {
Cerr << " " << value << Endl;
}
}
}
void PrintEnums() {
for (const auto& en : Enums)
PrintEnum(en);
}
public:
TScope Scope;
TEnums Enums;
private:
const char* const Data;
TString SourceFileName;
bool InEnum = false;
bool ScopeDeclaration = false;
bool InCompositeNamespace = false;
TString NextScopeName = BLOCK;
TString LastScope;
size_t EnumPos = 0;
TEnum CurrentEnum;
};
TEnumParser::TEnumParser(const TString& fileName) {
THolder<IInputStream> hIn;
IInputStream* in = nullptr;
if (fileName != "-") {
SourceFileName = fileName;
hIn.Reset(new TFileInput(fileName));
in = hIn.Get();
} else {
in = &Cin;
}
TString contents = in->ReadAll();
Parse(contents.data(), contents.size());
}
TEnumParser::TEnumParser(const char* data, size_t length) {
Parse(data, length);
}
TEnumParser::TEnumParser(IInputStream& in) {
TString contents = in.ReadAll();
Parse(contents.data(), contents.size());
}
void TEnumParser::Parse(const char* dataIn, size_t lengthIn) {
TMemoryInput mi(dataIn, lengthIn);
TString line;
TString result;
while (mi.ReadLine(line)) {
if (line.find("if (GetOwningArena() == other->GetOwningArena()) {") == TString::npos) {
result += line;
result += "\n";
}
}
const char* data = result.c_str();
size_t length = result.length();
const TStringBuf span(data, length);
const bool hasPragmaOnce = span.Contains("#pragma once");
const bool isProtobufHeader = span.Contains("// Generated by the protocol buffer compiler");
const bool isFlatbuffersHeader = span.Contains("// automatically generated by the FlatBuffers compiler");
Y_ENSURE(
hasPragmaOnce || isProtobufHeader || isFlatbuffersHeader,
"Serialization functions can be generated only for enums in header files. "
"A valid header should either contain `#pragma once` or be an protobuf/flatbuf autogenerated header file. "
"See SEARCH-975 for more information. "
);
TCppContext cppContext(data, SourceFileName);
TMemoryInput in(data, length);
TCppSaxParser parser(&cppContext);
TransferData(&in, &parser);
parser.Finish();
// obtain result
Enums = cppContext.Enums;
if (cppContext.Scope) {
cppContext.PrintEnums();
cppContext.PrintScope();
ythrow yexception() << "Unbalanced scope, something is wrong with enum parser. ";
}
}