diff options
author | vvvv <vvvv@yandex-team.ru> | 2022-02-24 14:32:47 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.ru> | 2022-02-24 14:32:47 +0300 |
commit | 5f65edb7fa611063eb57482252762006764f5adf (patch) | |
tree | f3665dc218776368f64ebbafca9986e338fa08cd | |
parent | f31fd59b254383df90ae3e828884f2dd64b6440f (diff) | |
download | ydb-5f65edb7fa611063eb57482252762006764f5adf.tar.gz |
YQL-13710 parse type.dat, use type OIDs
ref:bd7993d539afa5d6041856dd58fbcde2e6e24651
-rw-r--r-- | ydb/library/yql/parser/pg_catalog/catalog.cpp | 148 | ||||
-rw-r--r-- | ydb/library/yql/parser/pg_catalog/catalog.h | 25 | ||||
-rw-r--r-- | ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp | 70 |
3 files changed, 204 insertions, 39 deletions
diff --git a/ydb/library/yql/parser/pg_catalog/catalog.cpp b/ydb/library/yql/parser/pg_catalog/catalog.cpp index 8011878831..abc148fa87 100644 --- a/ydb/library/yql/parser/pg_catalog/catalog.cpp +++ b/ydb/library/yql/parser/pg_catalog/catalog.cpp @@ -6,12 +6,14 @@ #include <util/string/split.h> #include <library/cpp/resource/resource.h> -namespace NYql { +namespace NYql::NPg { using TOperators = THashMap<ui32, TOperDesc>; using TProcs = THashMap<ui32, TProcDesc>; +using TTypes = THashMap<ui32, TTypeDesc>; + class TParser { public: void Do(const TString& dat) { @@ -24,6 +26,7 @@ public: }; EState state = EState::WaitBracket; + bool AfterBackSlash = false; TStringBuilder key; TStringBuilder value; for (char c : dat) { @@ -70,6 +73,17 @@ public: break; } case EState::WaitForEndOfValue: { + if (c == '\\' && !AfterBackSlash) { + AfterBackSlash = true; + continue; + } + + if (AfterBackSlash) { + AfterBackSlash = false; + value << c; + continue; + } + if (c != '\'') { value << c; continue; @@ -89,13 +103,16 @@ public: class TOperatorsParser : public TParser { public: - TOperatorsParser(TOperators& operators) + TOperatorsParser(TOperators& operators, const THashMap<TString, ui32>& typeByName) : Operators(operators) + , TypeByName(typeByName) {} void OnKey(const TString& key, const TString& value) override { if (key == "oid") { LastOperator.OperId = FromString<ui32>(value); + } else if (key == "oprname") { + LastOperator.Name = value; } else if (key == "oprkind") { if (value == "r") { LastOperator.Kind = EOperKind::RightUnary; @@ -104,33 +121,42 @@ public: } } else if (key == "oprleft") { if (value != "0") { - LastOperator.LeftType = value; + auto typeIdPtr = TypeByName.FindPtr(value); + Y_ENSURE(typeIdPtr); + LastOperator.LeftType = *typeIdPtr; } } else if (key == "oprright") { if (value != "0") { - LastOperator.RightType = value; + auto typeIdPtr = TypeByName.FindPtr(value); + Y_ENSURE(typeIdPtr); + LastOperator.RightType = *typeIdPtr; } } else if (key == "oprresult") { - LastOperator.ResultType = value; + auto typeIdPtr = TypeByName.FindPtr(value); + Y_ENSURE(typeIdPtr); + LastOperator.ResultType = *typeIdPtr; } else if (key == "oprcode") { LastOperator.Code = value; } } void OnFinish() override { + Y_ENSURE(!LastOperator.Name.empty()); Operators[LastOperator.OperId] = LastOperator; LastOperator = TOperDesc(); } private: TOperators& Operators; + const THashMap<TString, ui32>& TypeByName; TOperDesc LastOperator; }; class TProcsParser : public TParser { public: - TProcsParser(TProcs& procs) + TProcsParser(TProcs& procs, const THashMap<TString, ui32>& typeByName) : Procs(procs) + , TypeByName(typeByName) {} void OnKey(const TString& key, const TString& value) override { @@ -139,7 +165,9 @@ public: } else if (key == "provariadic") { IsSupported = false; } else if (key == "prorettype") { - LastProc.ResultType = value; + auto idPtr = TypeByName.FindPtr(value); + Y_ENSURE(idPtr); + LastProc.ResultType = *idPtr; } else if (key == "proname") { LastProc.Name = value; } else if (key == "prosrc") { @@ -147,7 +175,14 @@ public: } else if (key == "prolang") { IsSupported = false; } else if (key == "proargtypes") { - Split(value, " ", LastProc.ArgTypes); + TVector<TString> strArgs; + Split(value, " ", strArgs); + LastProc.ArgTypes.reserve(strArgs.size()); + for (const auto& s : strArgs) { + auto idPtr = TypeByName.FindPtr(s); + Y_ENSURE(idPtr); + LastProc.ArgTypes.push_back(*idPtr); + } } else if (key == "proisstrict") { LastProc.IsStrict = (value == "t"); } else if (key == "proretset") { @@ -157,40 +192,97 @@ public: void OnFinish() override { if (IsSupported) { + Y_ENSURE(!LastProc.Name.empty()); Procs[LastProc.ProcId] = LastProc; } IsSupported = true; + LastProc = TProcDesc(); } private: TProcs& Procs; + const THashMap<TString, ui32>& TypeByName; TProcDesc LastProc; bool IsSupported = true; }; -TOperators ParseOperators(const TString& dat) { +class TTypesParser : public TParser { +public: + TTypesParser(TTypes& types) + : Types(types) + {} + + void OnKey(const TString& key, const TString& value) override { + if (key == "oid") { + LastType.TypeId = FromString<ui32>(value); + } else if (key == "array_type_oid") { + LastType.ArrayTypeId = FromString<ui32>(value); + } else if (key == "typname") { + LastType.Name = value; + } else if (key == "typelem") { + LastType.ElementType = value; + } + } + + void OnFinish() override { + Y_ENSURE(!LastType.Name.empty()); + Types[LastType.TypeId] = LastType; + if (LastType.ArrayTypeId) { + Types[LastType.ArrayTypeId] = LastType; + } + + LastType = TTypeDesc(); + } + +private: + TTypes& Types; + TTypeDesc LastType; +}; + +TOperators ParseOperators(const TString& dat, const THashMap<TString, ui32>& typeByName) { TOperators ret; - TOperatorsParser parser(ret); + TOperatorsParser parser(ret, typeByName); parser.Do(dat); return ret; } -TProcs ParseProcs(const TString& dat) { +TProcs ParseProcs(const TString& dat, const THashMap<TString, ui32>& typeByName) { TProcs ret; - TProcsParser parser(ret); + TProcsParser parser(ret, typeByName); + parser.Do(dat); + return ret; +} + +TTypes ParseTypes(const TString& dat) { + TTypes ret; + TTypesParser parser(ret); parser.Do(dat); return ret; } struct TCatalog { TCatalog() { + TString typeData; + Y_ENSURE(NResource::FindExact("pg_type.dat", &typeData)); TString opData; Y_ENSURE(NResource::FindExact("pg_operator.dat", &opData)); TString procData; Y_ENSURE(NResource::FindExact("pg_proc.dat", &procData)); - Operators = ParseOperators(opData); - Procs = ParseProcs(procData); + Types = ParseTypes(typeData); + for (const auto&[k, v] : Types) { + if (k == v.TypeId) { + Y_ENSURE(TypeByName.insert(std::make_pair(v.Name, k)).second); + } + + if (k == v.ArrayTypeId) { + Y_ENSURE(TypeByName.insert(std::make_pair("_" + v.Name, k)).second); + } + } + + Operators = ParseOperators(opData, TypeByName); + Procs = ParseProcs(procData, TypeByName); + for (const auto& [k, v]: Procs) { ProcByName[v.Name].push_back(k); } @@ -202,10 +294,12 @@ struct TCatalog { TOperators Operators; TProcs Procs; + TTypes Types; THashMap<TString, TVector<ui32>> ProcByName; + THashMap<TString, ui32> TypeByName; }; -const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argTypes) { +const TProcDesc& LookupProc(const TString& name, const TVector<ui32>& argTypeIds) { const auto& catalog = TCatalog::Instance(); auto procIdPtr = catalog.ProcByName.FindPtr(name); if (!procIdPtr) { @@ -215,7 +309,7 @@ const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argType for (const auto& id : *procIdPtr) { const auto& d = catalog.Procs.FindPtr(id); Y_ENSURE(d); - if (d->ArgTypes != argTypes) { + if (argTypeIds != d->ArgTypes) { continue; } @@ -225,4 +319,26 @@ const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argType throw yexception() << "Unable to find an overload for function " << name << " with given argument types"; } +const TTypeDesc& LookupType(const TString& name) { + const auto& catalog = TCatalog::Instance(); + auto typeIdPtr = catalog.TypeByName.FindPtr(name); + if (!typeIdPtr) { + throw yexception() << "No such type: " << name; + } + + auto typePtr = catalog.Types.FindPtr(*typeIdPtr); + Y_ENSURE(typePtr); + return *typePtr; +} + +const TTypeDesc& LookupType(ui32 typeId) { + const auto& catalog = TCatalog::Instance(); + auto typePtr = catalog.Types.FindPtr(typeId); + if (!typePtr) { + throw yexception() << "No such type: " << typeId; + } + + return *typePtr; +} + } diff --git a/ydb/library/yql/parser/pg_catalog/catalog.h b/ydb/library/yql/parser/pg_catalog/catalog.h index cc92b5cc9b..b4ed9802d9 100644 --- a/ydb/library/yql/parser/pg_catalog/catalog.h +++ b/ydb/library/yql/parser/pg_catalog/catalog.h @@ -3,7 +3,7 @@ #include <util/generic/string.h> #include <util/generic/vector.h> -namespace NYql { +namespace NYql::NPg { enum class EOperKind { Binary, @@ -13,10 +13,11 @@ enum class EOperKind { struct TOperDesc { ui32 OperId = 0; + TString Name; EOperKind Kind = EOperKind::Binary; - TString LeftType; - TString RightType; - TString ResultType; + ui32 LeftType = 0; + ui32 RightType = 0; + ui32 ResultType = 0; TString Code; }; @@ -24,11 +25,21 @@ struct TProcDesc { ui32 ProcId = 0; TString Name; TString Src; - TVector<TString> ArgTypes; - TString ResultType; + TVector<ui32> ArgTypes; + ui32 ResultType = 0; bool IsStrict = true; }; -const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argTypes); +struct TTypeDesc { + ui32 TypeId = 0; + ui32 ArrayTypeId = 0; + TString Name; + TString ElementType; +}; + +const TProcDesc& LookupProc(const TString& name, const TVector<ui32>& argTypeIds); + +const TTypeDesc& LookupType(const TString& name); +const TTypeDesc& LookupType(ui32 typeId); } diff --git a/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp b/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp index 719a90c813..bf3b3aa360 100644 --- a/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp +++ b/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp @@ -2,38 +2,76 @@ #include <library/cpp/testing/unittest/registar.h> -using namespace NYql; +using namespace NYql::NPg; + +const TProcDesc& LookupProcByStrArgTypes(const TString& name, const TVector<TString>& argTypes) { + TVector<ui32> argTypeIds; + argTypeIds.reserve(argTypes.size()); + for (const auto& a : argTypes) { + argTypeIds.push_back(LookupType(a).TypeId); + } + + return LookupProc(name, argTypeIds); +} + +Y_UNIT_TEST_SUITE(TTypesTests) { + Y_UNIT_TEST(TestMissing) { + UNIT_ASSERT_EXCEPTION(LookupType("_foo_bar_"), yexception); + UNIT_ASSERT_EXCEPTION(LookupType(0), yexception); + } + + Y_UNIT_TEST(TestOk) { + auto ret = LookupType("text"); + UNIT_ASSERT_VALUES_EQUAL(ret.TypeId, 25); + UNIT_ASSERT_VALUES_EQUAL(ret.ArrayTypeId, 1009); + UNIT_ASSERT_VALUES_EQUAL(ret.Name, "text"); + UNIT_ASSERT_VALUES_EQUAL(ret.ElementType, ""); + + ret = LookupType("point"); + UNIT_ASSERT_VALUES_EQUAL(ret.TypeId, 600); + UNIT_ASSERT_VALUES_EQUAL(ret.ArrayTypeId, 1017); + UNIT_ASSERT_VALUES_EQUAL(ret.Name, "point"); + UNIT_ASSERT_VALUES_EQUAL(ret.ElementType, "float8"); + + ret = LookupType(1009); + UNIT_ASSERT_VALUES_EQUAL(ret.TypeId, 25); + UNIT_ASSERT_VALUES_EQUAL(ret.ArrayTypeId, 1009); + UNIT_ASSERT_VALUES_EQUAL(ret.Name, "text"); + UNIT_ASSERT_VALUES_EQUAL(ret.ElementType, ""); + } +} Y_UNIT_TEST_SUITE(TFunctionsTests) { Y_UNIT_TEST(TestMissing) { - UNIT_ASSERT_EXCEPTION(LookupProc("_foo_bar_", {}), yexception); + UNIT_ASSERT_EXCEPTION(LookupProcByStrArgTypes("_foo_bar_", {}), yexception); } Y_UNIT_TEST(TestMismatchArgTypes) { - UNIT_ASSERT_EXCEPTION(LookupProc("int4pl", {}), yexception); + UNIT_ASSERT_EXCEPTION(LookupProcByStrArgTypes("int4pl", {}), yexception); } Y_UNIT_TEST(TestOk) { - auto ret = LookupProc("int4pl", {"int4", "int4"}); - UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, "int4"); + auto ret = LookupProcByStrArgTypes("int4pl", {"int4", "int4"}); + UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, LookupType("int4").TypeId); UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], "int4"); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], "int4"); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], LookupType("int4").TypeId); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], LookupType("int4").TypeId); UNIT_ASSERT_VALUES_EQUAL(ret.Src, "int4pl"); - ret = LookupProc("substring", {"text", "int4", "int4"}); - UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, "text"); + ret = LookupProcByStrArgTypes("substring", {"text", "int4", "int4"}); + UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, LookupType("text").TypeId); UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes.size(), 3); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], "text"); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], "int4"); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[2], "int4"); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], LookupType("text").TypeId); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], LookupType("int4").TypeId); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[2], LookupType("int4").TypeId); UNIT_ASSERT_VALUES_EQUAL(ret.Src, "text_substr"); - ret = LookupProc("substring", {"text", "int4"}); - UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, "text"); + ret = LookupProcByStrArgTypes("substring", {"text", "int4"}); + UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, LookupType("text").TypeId); UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], "text"); - UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], "int4"); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], LookupType("text").TypeId); + UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], LookupType("int4").TypeId); UNIT_ASSERT_VALUES_EQUAL(ret.Src, "text_substr_no_len"); } } + |