diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2025-03-05 08:40:22 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-03-05 08:53:10 +0300 |
commit | 78e8655f82daee0ed66a7018c578039f834a39ef (patch) | |
tree | 175156e9c67fc68f9649df4da2f14a2afb6c7be1 | |
parent | e461a7c94c01378bb9578550ab81ef03e340d89d (diff) | |
download | ydb-78e8655f82daee0ed66a7018c578039f834a39ef.tar.gz |
Intermediate changes
commit_hash:db50ded25b1584b755040ad43d8d964fe43cd87a
8 files changed, 409 insertions, 3 deletions
diff --git a/yql/essentials/udfs/language/yql/test/canondata/result.json b/yql/essentials/udfs/language/yql/test/canondata/result.json index ad20b6aaa14..838e46edd2a 100644 --- a/yql/essentials/udfs/language/yql/test/canondata/result.json +++ b/yql/essentials/udfs/language/yql/test/canondata/result.json @@ -1,4 +1,19 @@ { + "test.test[ExtractClusters]": [ + { + "uri": "file://test.test_ExtractClusters_/results.txt" + } + ], + "test.test[ExtractFuncs]": [ + { + "uri": "file://test.test_ExtractFuncs_/results.txt" + } + ], + "test.test[ExtractInFuncs]": [ + { + "uri": "file://test.test_ExtractInFuncs_/results.txt" + } + ], "test.test[ObfuscateColumn]": [ { "uri": "file://test.test_ObfuscateColumn_/results.txt" diff --git a/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt new file mode 100644 index 00000000000..5dcf7acfcbf --- /dev/null +++ b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractClusters_/results.txt @@ -0,0 +1,55 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "OptionalType"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + [ + [ + "USE"; + "foo"; + "1" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt new file mode 100644 index 00000000000..0880c585111 --- /dev/null +++ b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractFuncs_/results.txt @@ -0,0 +1,104 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "q"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "select f()"; + [ + [ + [ + "FUNC"; + "f"; + "1" + ] + ] + ] + ]; + [ + "select f::g()"; + [ + [ + [ + "MODULE"; + "f"; + "1" + ]; + [ + "MODULE_FUNC"; + "f::g"; + "1" + ] + ] + ] + ]; + [ + "select javascript::y()"; + [ + [ + [ + "MODULE"; + "javascript"; + "1" + ] + ] + ] + ]; + [ + "select python3::x()"; + [ + [ + [ + "MODULE"; + "python3"; + "1" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt new file mode 100644 index 00000000000..cc41d96bec2 --- /dev/null +++ b/yql/essentials/udfs/language/yql/test/canondata/test.test_ExtractInFuncs_/results.txt @@ -0,0 +1,104 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "q"; + [ + "DataType"; + "String" + ] + ]; + [ + "column1"; + [ + "OptionalType"; + [ + "ListType"; + [ + "TupleType"; + [ + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "String" + ]; + [ + "DataType"; + "Uint64" + ] + ] + ] + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + "select 1 in f()"; + [ + [ + [ + "FUNC"; + "f"; + "1" + ] + ] + ] + ]; + [ + "select 1 in f::g()"; + [ + [ + [ + "MODULE"; + "f"; + "1" + ]; + [ + "MODULE_FUNC"; + "f::g"; + "1" + ] + ] + ] + ]; + [ + "select 1 in javascript::y()"; + [ + [ + [ + "MODULE"; + "javascript"; + "1" + ] + ] + ] + ]; + [ + "select 1 in python3::x()"; + [ + [ + [ + "MODULE"; + "python3"; + "1" + ] + ] + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql b/yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql new file mode 100644 index 00000000000..57a25c08273 --- /dev/null +++ b/yql/essentials/udfs/language/yql/test/cases/ExtractClusters.sql @@ -0,0 +1,2 @@ +SELECT + ListFilter(YqlLang::RuleFreq("use foo"),($x)->($x.0 == "USE")) diff --git a/yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql b/yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql new file mode 100644 index 00000000000..cfef5d8e4e5 --- /dev/null +++ b/yql/essentials/udfs/language/yql/test/cases/ExtractFuncs.sql @@ -0,0 +1,9 @@ +SELECT + q,ListSort(ListFilter(YqlLang::RuleFreq(q),($x)->($x.0 in ("FUNC","MODULE","MODULE_FUNC")))) +FROM (VALUES + ("select f()"), + ("select f::g()"), + ("select python3::x()"), + ("select javascript::y()") +) AS a(q) +order by q diff --git a/yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql b/yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql new file mode 100644 index 00000000000..cc47fe15cd7 --- /dev/null +++ b/yql/essentials/udfs/language/yql/test/cases/ExtractInFuncs.sql @@ -0,0 +1,9 @@ +SELECT + q,ListSort(ListFilter(YqlLang::RuleFreq(q),($x)->($x.0 in ("FUNC","MODULE","MODULE_FUNC")))) +FROM (VALUES + ("select 1 in f()"), + ("select 1 in f::g()"), + ("select 1 in python3::x()"), + ("select 1 in javascript::y()") +) AS a(q) +order by q diff --git a/yql/essentials/udfs/language/yql/yql_language_udf.cpp b/yql/essentials/udfs/language/yql/yql_language_udf.cpp index ab1090a1089..416284e4334 100644 --- a/yql/essentials/udfs/language/yql/yql_language_udf.cpp +++ b/yql/essentials/udfs/language/yql/yql_language_udf.cpp @@ -37,10 +37,12 @@ public: const NProtoBuf::Descriptor* descr = msg.GetDescriptor(); if (descr == TToken::GetDescriptor()) { return; - } - - if (descr == TRule_use_stmt::GetDescriptor()) { + } else if (descr == TRule_use_stmt::GetDescriptor()) { VisitUseStmt(dynamic_cast<const TRule_use_stmt&>(msg)); + } else if (descr == TRule_unary_casual_subexpr::GetDescriptor()) { + VisitUnaryCasualSubexpr(dynamic_cast<const TRule_unary_casual_subexpr&>(msg)); + } else if (descr == TRule_in_unary_casual_subexpr::GetDescriptor()) { + VisitUnaryCasualSubexpr(dynamic_cast<const TRule_in_unary_casual_subexpr&>(msg)); } TStringBuf fullName = descr->full_name(); @@ -81,6 +83,111 @@ private: } } + template<typename TUnaryCasualExprRule> + void VisitUnaryCasualSubexpr(const TUnaryCasualExprRule& msg) { + const auto& block = msg.GetBlock1(); + TString func; + TString module; + switch (block.Alt_case()) { + case TUnaryCasualExprRule::TBlock1::kAlt1: { + const auto& alt = block.GetAlt1(); + if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { + func = Id(alt.GetRule_id_expr1(), Translation); + } else { + func = Id(alt.GetRule_id_expr_in1(), Translation); + } + break; + } + case TUnaryCasualExprRule::TBlock1::kAlt2: { + auto& alt = block.GetAlt2(); + if constexpr (std::is_same_v<TUnaryCasualExprRule, TRule_unary_casual_subexpr>) { + if (!ParseUdf(alt.GetRule_atom_expr1(), module, func)) { + return; + } + } else { + if (!ParseUdf(alt.GetRule_in_atom_expr1(), module, func)) { + return; + } + } + + Freqs[std::make_pair("MODULE", module)] += 1; + auto lowerModule = to_lower(module); + if (lowerModule.Contains("javascript") || lowerModule.Contains("python")) { + return; + } + + Freqs[std::make_pair("MODULE_FUNC", module + "::" + func)] += 1; + return; + } + case TUnaryCasualExprRule::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + const auto& suffix = msg.GetRule_unary_subexpr_suffix2(); + for (auto& _b : suffix.GetBlock1()) { + const auto& b = _b.GetBlock1(); + switch (b.Alt_case()) { + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt1: { + // key_expr + return; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt2: { + // invoke_expr + Freqs[std::make_pair("FUNC", func)] += 1; + return; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::kAlt3: { + // dot + return; + } + case TRule_unary_subexpr_suffix::TBlock1::TBlock1::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + } + } + + bool ParseUdf(const TRule_atom_expr& msg, TString& module, TString& func) { + if (msg.Alt_case() != TRule_atom_expr::kAltAtomExpr7) { + return false; + } + + const auto& alt = msg.GetAlt_atom_expr7(); + module = Id(alt.GetRule_an_id_or_type1(), Translation); + switch (alt.GetBlock3().Alt_case()) { + case TRule_atom_expr::TAlt7::TBlock3::kAlt1: + func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation); + break; + case TRule_atom_expr::TAlt7::TBlock3::kAlt2: { + return false; + } + case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET: + Y_ABORT("Unsigned number: you should change implementation according to grammar changes"); + } + + return true; + } + + bool ParseUdf(const TRule_in_atom_expr& msg, TString& module, TString& func) { + if (msg.Alt_case() != TRule_in_atom_expr::kAltInAtomExpr6) { + return false; + } + + const auto& alt = msg.GetAlt_in_atom_expr6(); + module = Id(alt.GetRule_an_id_or_type1(), Translation); + switch (alt.GetBlock3().Alt_case()) { + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: + func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation); + break; + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { + return false; + } + case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); + } + + return true; + } + void VisitAllFields(const NProtoBuf::Message& msg, const NProtoBuf::Descriptor* descr) { for (int i = 0; i < descr->field_count(); ++i) { const NProtoBuf::FieldDescriptor* fd = descr->field(i); @@ -182,3 +289,4 @@ SIMPLE_MODULE(TYqlLangModule, ); REGISTER_MODULES(TYqlLangModule); + |