aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2025-03-05 08:40:22 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-03-05 08:53:10 +0300
commit78e8655f82daee0ed66a7018c578039f834a39ef (patch)
tree175156e9c67fc68f9649df4da2f14a2afb6c7be1
parente461a7c94c01378bb9578550ab81ef03e340d89d (diff)
downloadydb-78e8655f82daee0ed66a7018c578039f834a39ef.tar.gz
Intermediate changes
commit_hash:db50ded25b1584b755040ad43d8d964fe43cd87a
-rw-r--r--yql/essentials/udfs/language/yql/test/canondata/result.json15
-rw-r--r--yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt55
-rw-r--r--yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt104
-rw-r--r--yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt104
-rw-r--r--yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql2
-rw-r--r--yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql9
-rw-r--r--yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql9
-rw-r--r--yql/essentials/udfs/language/yql/yql_language_udf.cpp114
8 files changed, 409 insertions, 3 deletions
diff --git a/yql/essentials/udfs/language/yql/test/canondata/result.json b/yql/essentials/udfs/language/yql/test/canondata/result.json
index ad20b6aaa14..838e46edd2a 100644
--- a/yql/essentials/udfs/language/yql/test/canondata/result.json
+++ b/yql/essentials/udfs/language/yql/test/canondata/result.json
@@ -1,4 +1,19 @@
{
+ "test.test[ExtractClusters]": [
+ {
+ "uri": "file://test.test_ExtractClusters_/results.txt"
+ }
+ ],
+ "test.test[ExtractFuncs]": [
+ {
+ "uri": "file://test.test_ExtractFuncs_/results.txt"
+ }
+ ],
+ "test.test[ExtractInFuncs]": [
+ {
+ "uri": "file://test.test_ExtractInFuncs_/results.txt"
+ }
+ ],
"test.test[ObfuscateColumn]": [
{
"uri": "file://test.test_ObfuscateColumn_/results.txt"
diff --git a/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt
new file mode 100644
index 00000000000..5dcf7acfcbf
--- /dev/null
+++ b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt
@@ -0,0 +1,55 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "OptionalType";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ [
+ [
+ "USE";
+ "foo";
+ "1"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt
new file mode 100644
index 00000000000..0880c585111
--- /dev/null
+++ b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt
@@ -0,0 +1,104 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "q";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "select f()";
+ [
+ [
+ [
+ "FUNC";
+ "f";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "select f::g()";
+ [
+ [
+ [
+ "MODULE";
+ "f";
+ "1"
+ ];
+ [
+ "MODULE_FUNC";
+ "f::g";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "select javascript::y()";
+ [
+ [
+ [
+ "MODULE";
+ "javascript";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "select python3::x()";
+ [
+ [
+ [
+ "MODULE";
+ "python3";
+ "1"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt
new file mode 100644
index 00000000000..cc41d96bec2
--- /dev/null
+++ b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt
@@ -0,0 +1,104 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "q";
+ [
+ "DataType";
+ "String"
+ ]
+ ];
+ [
+ "column1";
+ [
+ "OptionalType";
+ [
+ "ListType";
+ [
+ "TupleType";
+ [
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "String"
+ ];
+ [
+ "DataType";
+ "Uint64"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ "select 1 in f()";
+ [
+ [
+ [
+ "FUNC";
+ "f";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "select 1 in f::g()";
+ [
+ [
+ [
+ "MODULE";
+ "f";
+ "1"
+ ];
+ [
+ "MODULE_FUNC";
+ "f::g";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "select 1 in javascript::y()";
+ [
+ [
+ [
+ "MODULE";
+ "javascript";
+ "1"
+ ]
+ ]
+ ]
+ ];
+ [
+ "select 1 in python3::x()";
+ [
+ [
+ [
+ "MODULE";
+ "python3";
+ "1"
+ ]
+ ]
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql b/yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql
new file mode 100644
index 00000000000..57a25c08273
--- /dev/null
+++ b/yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql
@@ -0,0 +1,2 @@
+SELECT
+ ListFilter(YqlLang::RuleFreq("use foo"),($x)->($x.0 == "USE"))
diff --git a/yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql b/yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql
new file mode 100644
index 00000000000..cfef5d8e4e5
--- /dev/null
+++ b/yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql
@@ -0,0 +1,9 @@
+SELECT
+ q,ListSort(ListFilter(YqlLang::RuleFreq(q),($x)->($x.0 in ("FUNC","MODULE","MODULE_FUNC"))))
+FROM (VALUES
+ ("select f()"),
+ ("select f::g()"),
+ ("select python3::x()"),
+ ("select javascript::y()")
+) AS a(q)
+order by q
diff --git a/yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql b/yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql
new file mode 100644
index 00000000000..cc47fe15cd7
--- /dev/null
+++ b/yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql
@@ -0,0 +1,9 @@
+SELECT
+ q,ListSort(ListFilter(YqlLang::RuleFreq(q),($x)->($x.0 in ("FUNC","MODULE","MODULE_FUNC"))))
+FROM (VALUES
+ ("select 1 in f()"),
+ ("select 1 in f::g()"),
+ ("select 1 in python3::x()"),
+ ("select 1 in javascript::y()")
+) AS a(q)
+order by q
diff --git a/yql/essentials/udfs/language/yql/yql_language_udf.cpp b/yql/essentials/udfs/language/yql/yql_language_udf.cpp
index ab1090a1089..416284e4334 100644
--- a/yql/essentials/udfs/language/yql/yql_language_udf.cpp
+++ b/yql/essentials/udfs/language/yql/yql_language_udf.cpp
@@ -37,10 +37,12 @@ public:
const NProtoBuf::Descriptor* descr = msg.GetDescriptor();
if (descr == TToken::GetDescriptor()) {
return;
- }
-
- if (descr == TRule_use_stmt::GetDescriptor()) {
+ } else if (descr == TRule_use_stmt::GetDescriptor()) {
VisitUseStmt(dynamic_cast<const TRule_use_stmt&>(msg));
+ } else if (descr == TRule_unary_casual_subexpr::GetDescriptor()) {
+ VisitUnaryCasualSubexpr(dynamic_cast<const TRule_unary_casual_subexpr&>(msg));
+ } else if (descr == TRule_in_unary_casual_subexpr::GetDescriptor()) {
+ VisitUnaryCasualSubexpr(dynamic_cast<const TRule_in_unary_casual_subexpr&>(msg));
}
TStringBuf fullName = descr->full_name();
@@ -81,6 +83,111 @@ private:
}
}
+ template<typename TUnaryCasualExprRule>
+ void VisitUnaryCasualSubexpr(const TUnaryCasualExprRule& msg) {
+ const auto& block = msg.GetBlock1();
+ TString func;
+ TString module;
+ switch (block.Alt_case()) {
+ case TUnaryCasualExprRule::TBlock1::kAlt1: {
+ const auto& alt = block.GetAlt1();
+ if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
+ func = Id(alt.GetRule_id_expr1(), Translation);
+ } else {
+ func = Id(alt.GetRule_id_expr_in1(), Translation);
+ }
+ break;
+ }
+ case TUnaryCasualExprRule::TBlock1::kAlt2: {
+ auto& alt = block.GetAlt2();
+ if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) {
+ if (!ParseUdf(alt.GetRule_atom_expr1(), module, func)) {
+ return;
+ }
+ } else {
+ if (!ParseUdf(alt.GetRule_in_atom_expr1(), module, func)) {
+ return;
+ }
+ }
+
+ Freqs[std::make_pair("MODULE", module)] += 1;
+ auto lowerModule = to_lower(module);
+ if (lowerModule.Contains("javascript") || lowerModule.Contains("python")) {
+ return;
+ }
+
+ Freqs[std::make_pair("MODULE_FUNC", module + "::" + func)] += 1;
+ return;
+ }
+ case TUnaryCasualExprRule::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ const auto& suffix = msg.GetRule_unary_subexpr_suffix2();
+ for (auto& _b : suffix.GetBlock1()) {
+ const auto& b = _b.GetBlock1();
+ switch (b.Alt_case()) {
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: {
+ // key_expr
+ return;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: {
+ // invoke_expr
+ Freqs[std::make_pair("FUNC", func)] += 1;
+ return;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: {
+ // dot
+ return;
+ }
+ case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+ }
+ }
+
+ bool ParseUdf(const TRule_atom_expr& msg, TString& module, TString& func) {
+ if (msg.Alt_case() != TRule_atom_expr::kAltAtomExpr7) {
+ return false;
+ }
+
+ const auto& alt = msg.GetAlt_atom_expr7();
+ module = Id(alt.GetRule_an_id_or_type1(), Translation);
+ switch (alt.GetBlock3().Alt_case()) {
+ case TRule_atom_expr::TAlt7::TBlock3::kAlt1:
+ func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation);
+ break;
+ case TRule_atom_expr::TAlt7::TBlock3::kAlt2: {
+ return false;
+ }
+ case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET:
+ Y_ABORT("Unsigned number: you should change implementation according to grammar changes");
+ }
+
+ return true;
+ }
+
+ bool ParseUdf(const TRule_in_atom_expr& msg, TString& module, TString& func) {
+ if (msg.Alt_case() != TRule_in_atom_expr::kAltInAtomExpr6) {
+ return false;
+ }
+
+ const auto& alt = msg.GetAlt_in_atom_expr6();
+ module = Id(alt.GetRule_an_id_or_type1(), Translation);
+ switch (alt.GetBlock3().Alt_case()) {
+ case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1:
+ func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation);
+ break;
+ case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: {
+ return false;
+ }
+ case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET:
+ Y_ABORT("You should change implementation according to grammar changes");
+ }
+
+ return true;
+ }
+
void VisitAllFields(const NProtoBuf::Message& msg, const NProtoBuf::Descriptor* descr) {
for (int i = 0; i < descr->field_count(); ++i) {
const NProtoBuf::FieldDescriptor* fd = descr->field(i);
@@ -182,3 +289,4 @@ SIMPLE_MODULE(TYqlLangModule,
);
REGISTER_MODULES(TYqlLangModule);
+