aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@yandex-team.ru>2022-02-24 14:32:47 +0300
committervvvv <vvvv@yandex-team.ru>2022-02-24 14:32:47 +0300
commit5f65edb7fa611063eb57482252762006764f5adf (patch)
treef3665dc218776368f64ebbafca9986e338fa08cd
parentf31fd59b254383df90ae3e828884f2dd64b6440f (diff)
downloadydb-5f65edb7fa611063eb57482252762006764f5adf.tar.gz
YQL-13710 parse type.dat, use type OIDs
ref:bd7993d539afa5d6041856dd58fbcde2e6e24651
-rw-r--r--ydb/library/yql/parser/pg_catalog/catalog.cpp148
-rw-r--r--ydb/library/yql/parser/pg_catalog/catalog.h25
-rw-r--r--ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp70
3 files changed, 204 insertions, 39 deletions
diff --git a/ydb/library/yql/parser/pg_catalog/catalog.cpp b/ydb/library/yql/parser/pg_catalog/catalog.cpp
index 8011878831..abc148fa87 100644
--- a/ydb/library/yql/parser/pg_catalog/catalog.cpp
+++ b/ydb/library/yql/parser/pg_catalog/catalog.cpp
@@ -6,12 +6,14 @@
#include <util/string/split.h>
#include <library/cpp/resource/resource.h>
-namespace NYql {
+namespace NYql::NPg {
using TOperators = THashMap<ui32, TOperDesc>;
using TProcs = THashMap<ui32, TProcDesc>;
+using TTypes = THashMap<ui32, TTypeDesc>;
+
class TParser {
public:
void Do(const TString& dat) {
@@ -24,6 +26,7 @@ public:
};
EState state = EState::WaitBracket;
+ bool AfterBackSlash = false;
TStringBuilder key;
TStringBuilder value;
for (char c : dat) {
@@ -70,6 +73,17 @@ public:
break;
}
case EState::WaitForEndOfValue: {
+ if (c == '\\' && !AfterBackSlash) {
+ AfterBackSlash = true;
+ continue;
+ }
+
+ if (AfterBackSlash) {
+ AfterBackSlash = false;
+ value << c;
+ continue;
+ }
+
if (c != '\'') {
value << c;
continue;
@@ -89,13 +103,16 @@ public:
class TOperatorsParser : public TParser {
public:
- TOperatorsParser(TOperators& operators)
+ TOperatorsParser(TOperators& operators, const THashMap<TString, ui32>& typeByName)
: Operators(operators)
+ , TypeByName(typeByName)
{}
void OnKey(const TString& key, const TString& value) override {
if (key == "oid") {
LastOperator.OperId = FromString<ui32>(value);
+ } else if (key == "oprname") {
+ LastOperator.Name = value;
} else if (key == "oprkind") {
if (value == "r") {
LastOperator.Kind = EOperKind::RightUnary;
@@ -104,33 +121,42 @@ public:
}
} else if (key == "oprleft") {
if (value != "0") {
- LastOperator.LeftType = value;
+ auto typeIdPtr = TypeByName.FindPtr(value);
+ Y_ENSURE(typeIdPtr);
+ LastOperator.LeftType = *typeIdPtr;
}
} else if (key == "oprright") {
if (value != "0") {
- LastOperator.RightType = value;
+ auto typeIdPtr = TypeByName.FindPtr(value);
+ Y_ENSURE(typeIdPtr);
+ LastOperator.RightType = *typeIdPtr;
}
} else if (key == "oprresult") {
- LastOperator.ResultType = value;
+ auto typeIdPtr = TypeByName.FindPtr(value);
+ Y_ENSURE(typeIdPtr);
+ LastOperator.ResultType = *typeIdPtr;
} else if (key == "oprcode") {
LastOperator.Code = value;
}
}
void OnFinish() override {
+ Y_ENSURE(!LastOperator.Name.empty());
Operators[LastOperator.OperId] = LastOperator;
LastOperator = TOperDesc();
}
private:
TOperators& Operators;
+ const THashMap<TString, ui32>& TypeByName;
TOperDesc LastOperator;
};
class TProcsParser : public TParser {
public:
- TProcsParser(TProcs& procs)
+ TProcsParser(TProcs& procs, const THashMap<TString, ui32>& typeByName)
: Procs(procs)
+ , TypeByName(typeByName)
{}
void OnKey(const TString& key, const TString& value) override {
@@ -139,7 +165,9 @@ public:
} else if (key == "provariadic") {
IsSupported = false;
} else if (key == "prorettype") {
- LastProc.ResultType = value;
+ auto idPtr = TypeByName.FindPtr(value);
+ Y_ENSURE(idPtr);
+ LastProc.ResultType = *idPtr;
} else if (key == "proname") {
LastProc.Name = value;
} else if (key == "prosrc") {
@@ -147,7 +175,14 @@ public:
} else if (key == "prolang") {
IsSupported = false;
} else if (key == "proargtypes") {
- Split(value, " ", LastProc.ArgTypes);
+ TVector<TString> strArgs;
+ Split(value, " ", strArgs);
+ LastProc.ArgTypes.reserve(strArgs.size());
+ for (const auto& s : strArgs) {
+ auto idPtr = TypeByName.FindPtr(s);
+ Y_ENSURE(idPtr);
+ LastProc.ArgTypes.push_back(*idPtr);
+ }
} else if (key == "proisstrict") {
LastProc.IsStrict = (value == "t");
} else if (key == "proretset") {
@@ -157,40 +192,97 @@ public:
void OnFinish() override {
if (IsSupported) {
+ Y_ENSURE(!LastProc.Name.empty());
Procs[LastProc.ProcId] = LastProc;
}
IsSupported = true;
+ LastProc = TProcDesc();
}
private:
TProcs& Procs;
+ const THashMap<TString, ui32>& TypeByName;
TProcDesc LastProc;
bool IsSupported = true;
};
-TOperators ParseOperators(const TString& dat) {
+class TTypesParser : public TParser {
+public:
+ TTypesParser(TTypes& types)
+ : Types(types)
+ {}
+
+ void OnKey(const TString& key, const TString& value) override {
+ if (key == "oid") {
+ LastType.TypeId = FromString<ui32>(value);
+ } else if (key == "array_type_oid") {
+ LastType.ArrayTypeId = FromString<ui32>(value);
+ } else if (key == "typname") {
+ LastType.Name = value;
+ } else if (key == "typelem") {
+ LastType.ElementType = value;
+ }
+ }
+
+ void OnFinish() override {
+ Y_ENSURE(!LastType.Name.empty());
+ Types[LastType.TypeId] = LastType;
+ if (LastType.ArrayTypeId) {
+ Types[LastType.ArrayTypeId] = LastType;
+ }
+
+ LastType = TTypeDesc();
+ }
+
+private:
+ TTypes& Types;
+ TTypeDesc LastType;
+};
+
+TOperators ParseOperators(const TString& dat, const THashMap<TString, ui32>& typeByName) {
TOperators ret;
- TOperatorsParser parser(ret);
+ TOperatorsParser parser(ret, typeByName);
parser.Do(dat);
return ret;
}
-TProcs ParseProcs(const TString& dat) {
+TProcs ParseProcs(const TString& dat, const THashMap<TString, ui32>& typeByName) {
TProcs ret;
- TProcsParser parser(ret);
+ TProcsParser parser(ret, typeByName);
+ parser.Do(dat);
+ return ret;
+}
+
+TTypes ParseTypes(const TString& dat) {
+ TTypes ret;
+ TTypesParser parser(ret);
parser.Do(dat);
return ret;
}
struct TCatalog {
TCatalog() {
+ TString typeData;
+ Y_ENSURE(NResource::FindExact("pg_type.dat", &typeData));
TString opData;
Y_ENSURE(NResource::FindExact("pg_operator.dat", &opData));
TString procData;
Y_ENSURE(NResource::FindExact("pg_proc.dat", &procData));
- Operators = ParseOperators(opData);
- Procs = ParseProcs(procData);
+ Types = ParseTypes(typeData);
+ for (const auto&[k, v] : Types) {
+ if (k == v.TypeId) {
+ Y_ENSURE(TypeByName.insert(std::make_pair(v.Name, k)).second);
+ }
+
+ if (k == v.ArrayTypeId) {
+ Y_ENSURE(TypeByName.insert(std::make_pair("_" + v.Name, k)).second);
+ }
+ }
+
+ Operators = ParseOperators(opData, TypeByName);
+ Procs = ParseProcs(procData, TypeByName);
+
for (const auto& [k, v]: Procs) {
ProcByName[v.Name].push_back(k);
}
@@ -202,10 +294,12 @@ struct TCatalog {
TOperators Operators;
TProcs Procs;
+ TTypes Types;
THashMap<TString, TVector<ui32>> ProcByName;
+ THashMap<TString, ui32> TypeByName;
};
-const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argTypes) {
+const TProcDesc& LookupProc(const TString& name, const TVector<ui32>& argTypeIds) {
const auto& catalog = TCatalog::Instance();
auto procIdPtr = catalog.ProcByName.FindPtr(name);
if (!procIdPtr) {
@@ -215,7 +309,7 @@ const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argType
for (const auto& id : *procIdPtr) {
const auto& d = catalog.Procs.FindPtr(id);
Y_ENSURE(d);
- if (d->ArgTypes != argTypes) {
+ if (argTypeIds != d->ArgTypes) {
continue;
}
@@ -225,4 +319,26 @@ const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argType
throw yexception() << "Unable to find an overload for function " << name << " with given argument types";
}
+const TTypeDesc& LookupType(const TString& name) {
+ const auto& catalog = TCatalog::Instance();
+ auto typeIdPtr = catalog.TypeByName.FindPtr(name);
+ if (!typeIdPtr) {
+ throw yexception() << "No such type: " << name;
+ }
+
+ auto typePtr = catalog.Types.FindPtr(*typeIdPtr);
+ Y_ENSURE(typePtr);
+ return *typePtr;
+}
+
+const TTypeDesc& LookupType(ui32 typeId) {
+ const auto& catalog = TCatalog::Instance();
+ auto typePtr = catalog.Types.FindPtr(typeId);
+ if (!typePtr) {
+ throw yexception() << "No such type: " << typeId;
+ }
+
+ return *typePtr;
+}
+
}
diff --git a/ydb/library/yql/parser/pg_catalog/catalog.h b/ydb/library/yql/parser/pg_catalog/catalog.h
index cc92b5cc9b..b4ed9802d9 100644
--- a/ydb/library/yql/parser/pg_catalog/catalog.h
+++ b/ydb/library/yql/parser/pg_catalog/catalog.h
@@ -3,7 +3,7 @@
#include <util/generic/string.h>
#include <util/generic/vector.h>
-namespace NYql {
+namespace NYql::NPg {
enum class EOperKind {
Binary,
@@ -13,10 +13,11 @@ enum class EOperKind {
struct TOperDesc {
ui32 OperId = 0;
+ TString Name;
EOperKind Kind = EOperKind::Binary;
- TString LeftType;
- TString RightType;
- TString ResultType;
+ ui32 LeftType = 0;
+ ui32 RightType = 0;
+ ui32 ResultType = 0;
TString Code;
};
@@ -24,11 +25,21 @@ struct TProcDesc {
ui32 ProcId = 0;
TString Name;
TString Src;
- TVector<TString> ArgTypes;
- TString ResultType;
+ TVector<ui32> ArgTypes;
+ ui32 ResultType = 0;
bool IsStrict = true;
};
-const TProcDesc& LookupProc(const TString& name, const TVector<TString>& argTypes);
+struct TTypeDesc {
+ ui32 TypeId = 0;
+ ui32 ArrayTypeId = 0;
+ TString Name;
+ TString ElementType;
+};
+
+const TProcDesc& LookupProc(const TString& name, const TVector<ui32>& argTypeIds);
+
+const TTypeDesc& LookupType(const TString& name);
+const TTypeDesc& LookupType(ui32 typeId);
}
diff --git a/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp b/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp
index 719a90c813..bf3b3aa360 100644
--- a/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp
+++ b/ydb/library/yql/parser/pg_catalog/ut/catalog_ut.cpp
@@ -2,38 +2,76 @@
#include <library/cpp/testing/unittest/registar.h>
-using namespace NYql;
+using namespace NYql::NPg;
+
+const TProcDesc& LookupProcByStrArgTypes(const TString& name, const TVector<TString>& argTypes) {
+ TVector<ui32> argTypeIds;
+ argTypeIds.reserve(argTypes.size());
+ for (const auto& a : argTypes) {
+ argTypeIds.push_back(LookupType(a).TypeId);
+ }
+
+ return LookupProc(name, argTypeIds);
+}
+
+Y_UNIT_TEST_SUITE(TTypesTests) {
+ Y_UNIT_TEST(TestMissing) {
+ UNIT_ASSERT_EXCEPTION(LookupType("_foo_bar_"), yexception);
+ UNIT_ASSERT_EXCEPTION(LookupType(0), yexception);
+ }
+
+ Y_UNIT_TEST(TestOk) {
+ auto ret = LookupType("text");
+ UNIT_ASSERT_VALUES_EQUAL(ret.TypeId, 25);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArrayTypeId, 1009);
+ UNIT_ASSERT_VALUES_EQUAL(ret.Name, "text");
+ UNIT_ASSERT_VALUES_EQUAL(ret.ElementType, "");
+
+ ret = LookupType("point");
+ UNIT_ASSERT_VALUES_EQUAL(ret.TypeId, 600);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArrayTypeId, 1017);
+ UNIT_ASSERT_VALUES_EQUAL(ret.Name, "point");
+ UNIT_ASSERT_VALUES_EQUAL(ret.ElementType, "float8");
+
+ ret = LookupType(1009);
+ UNIT_ASSERT_VALUES_EQUAL(ret.TypeId, 25);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArrayTypeId, 1009);
+ UNIT_ASSERT_VALUES_EQUAL(ret.Name, "text");
+ UNIT_ASSERT_VALUES_EQUAL(ret.ElementType, "");
+ }
+}
Y_UNIT_TEST_SUITE(TFunctionsTests) {
Y_UNIT_TEST(TestMissing) {
- UNIT_ASSERT_EXCEPTION(LookupProc("_foo_bar_", {}), yexception);
+ UNIT_ASSERT_EXCEPTION(LookupProcByStrArgTypes("_foo_bar_", {}), yexception);
}
Y_UNIT_TEST(TestMismatchArgTypes) {
- UNIT_ASSERT_EXCEPTION(LookupProc("int4pl", {}), yexception);
+ UNIT_ASSERT_EXCEPTION(LookupProcByStrArgTypes("int4pl", {}), yexception);
}
Y_UNIT_TEST(TestOk) {
- auto ret = LookupProc("int4pl", {"int4", "int4"});
- UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, "int4");
+ auto ret = LookupProcByStrArgTypes("int4pl", {"int4", "int4"});
+ UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, LookupType("int4").TypeId);
UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes.size(), 2);
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], "int4");
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], "int4");
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], LookupType("int4").TypeId);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], LookupType("int4").TypeId);
UNIT_ASSERT_VALUES_EQUAL(ret.Src, "int4pl");
- ret = LookupProc("substring", {"text", "int4", "int4"});
- UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, "text");
+ ret = LookupProcByStrArgTypes("substring", {"text", "int4", "int4"});
+ UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, LookupType("text").TypeId);
UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes.size(), 3);
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], "text");
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], "int4");
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[2], "int4");
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], LookupType("text").TypeId);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], LookupType("int4").TypeId);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[2], LookupType("int4").TypeId);
UNIT_ASSERT_VALUES_EQUAL(ret.Src, "text_substr");
- ret = LookupProc("substring", {"text", "int4"});
- UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, "text");
+ ret = LookupProcByStrArgTypes("substring", {"text", "int4"});
+ UNIT_ASSERT_VALUES_EQUAL(ret.ResultType, LookupType("text").TypeId);
UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes.size(), 2);
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], "text");
- UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], "int4");
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[0], LookupType("text").TypeId);
+ UNIT_ASSERT_VALUES_EQUAL(ret.ArgTypes[1], LookupType("int4").TypeId);
UNIT_ASSERT_VALUES_EQUAL(ret.Src, "text_substr_no_len");
}
}
+