aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2025-02-26 15:13:45 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2025-02-26 15:26:42 +0300
commit5b32975d31ed07a20d7799a128befa1b04991bd5 (patch)
treeeec671e9b5dd644fdd3ffe11ba9ce6315d8b783c
parent3364836723dd075c997fb496d86e02d64c626533 (diff)
downloadydb-5b32975d31ed07a20d7799a128befa1b04991bd5.tar.gz
Intermediate changes
commit_hash:3acdd39aebcbbdab2ce4ac95a183701d25b9fe43
-rw-r--r--yql/essentials/udfs/language/yql/ya.make3
-rw-r--r--yql/essentials/udfs/language/yql/yql_language_udf.cpp112
2 files changed, 113 insertions, 2 deletions
diff --git a/yql/essentials/udfs/language/yql/ya.make b/yql/essentials/udfs/language/yql/ya.make
index 90fe8dfbab..5167463ba4 100644
--- a/yql/essentials/udfs/language/yql/ya.make
+++ b/yql/essentials/udfs/language/yql/ya.make
@@ -2,7 +2,7 @@ YQL_UDF(yql_language_udf)
YQL_ABI_VERSION(
2
- 22
+ 39
0
)
@@ -21,6 +21,7 @@ PEERDIR(
yql/essentials/sql/v1/proto_parser/antlr4_ansi
yql/essentials/sql/pg_dummy
yql/essentials/sql/v1/format
+ library/cpp/protobuf/util
)
END()
diff --git a/yql/essentials/udfs/language/yql/yql_language_udf.cpp b/yql/essentials/udfs/language/yql/yql_language_udf.cpp
index ea5c55760b..356ecfade3 100644
--- a/yql/essentials/udfs/language/yql/yql_language_udf.cpp
+++ b/yql/essentials/udfs/language/yql/yql_language_udf.cpp
@@ -2,14 +2,70 @@
#include <yql/essentials/sql/v1/lexer/antlr4/lexer.h>
#include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h>
+#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
#include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h>
#include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h>
#include <yql/essentials/sql/v1/format/sql_format.h>
+#include <yql/essentials/sql/settings/translation_settings.h>
+#include <library/cpp/protobuf/util/simple_reflection.h>
using namespace NYql;
using namespace NKikimr::NUdf;
using namespace NSQLTranslation;
+class TRuleFreqVisitor {
+public:
+ TRuleFreqVisitor() {
+ }
+
+ void Visit(const NProtoBuf::Message& msg) {
+ const NProtoBuf::Descriptor* descr = msg.GetDescriptor();
+ if (descr->name() == "TToken") {
+ return;
+ }
+
+ TStringBuf fullName = descr->full_name();
+ fullName.SkipPrefix("NSQLv1Generated.");
+ for (int i = 0; i < descr->field_count(); ++i) {
+ const NProtoBuf::FieldDescriptor* fd = descr->field(i);
+ NProtoBuf::TConstField field(msg, fd);
+ if (!field.HasValue()) {
+ continue;
+ }
+
+ TStringBuf fieldFullName = fd->full_name();
+ fieldFullName.SkipPrefix("NSQLv1Generated.");
+ if (fieldFullName.EndsWith(".Descr")) {
+ continue;
+ }
+
+
+ Freqs[std::make_pair(fullName, fieldFullName)] += 1;
+ }
+
+ VisitAllFields(msg, descr);
+ }
+
+ const THashMap<std::pair<TString, TString>, ui64>& GetFreqs() const {
+ return Freqs;
+ }
+
+private:
+ void VisitAllFields(const NProtoBuf::Message& msg, const NProtoBuf::Descriptor* descr) {
+ for (int i = 0; i < descr->field_count(); ++i) {
+ const NProtoBuf::FieldDescriptor* fd = descr->field(i);
+ NProtoBuf::TConstField field(msg, fd);
+ if (field.IsMessage()) {
+ for (size_t j = 0; j < field.Size(); ++j) {
+ Visit(*field.Get<NProtoBuf::Message>(j));
+ }
+ }
+ }
+ }
+
+ THashMap<std::pair<TString, TString>, ui64> Freqs;
+};
+
SIMPLE_UDF(TObfuscate, TOptional<char*>(TAutoMap<char*>)) {
using namespace NSQLFormat;
try {
@@ -35,8 +91,62 @@ SIMPLE_UDF(TObfuscate, TOptional<char*>(TAutoMap<char*>)) {
}
}
+using TRuleFreqResult = TListType<TTuple<char*, char*, ui64>>;
+
+SIMPLE_UDF(TRuleFreq, TOptional<TRuleFreqResult>(TAutoMap<char*>)) {
+ try {
+ const TString query(args[0].AsStringRef());
+ NYql::TIssues issues;
+ google::protobuf::Arena arena;
+ NSQLTranslation::TTranslationSettings settings;
+ settings.Arena = &arena;
+ if (!ParseTranslationSettings(query, settings, issues)) {
+ return {};
+ }
+
+ NSQLTranslationV1::TLexers lexers;
+ lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory();
+ lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory();
+ auto lexer = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, true);
+ auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) {
+ Y_UNUSED(token);
+ };
+
+ if (!lexer->Tokenize(query, "", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) {
+ return {};
+ }
+
+ NSQLTranslationV1::TParsers parsers;
+ parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory();
+ parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory();
+ auto msg = NSQLTranslationV1::SqlAST(parsers, query, "", issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS,
+ settings.AnsiLexer, true, &arena);
+ if (!msg) {
+ return {};
+ }
+
+ TRuleFreqVisitor visitor;
+ visitor.Visit(*msg);
+
+ auto listBuilder = valueBuilder->NewListBuilder();
+ for (const auto& [key, f] : visitor.GetFreqs()) {
+ TUnboxedValue* items;
+ auto tuple = valueBuilder->NewArray(3, items);
+ items[0] = valueBuilder->NewString(key.first);
+ items[1] = valueBuilder->NewString(key.second);
+ items[2] = TUnboxedValuePod(f);
+ listBuilder->Add(std::move(tuple));
+ }
+
+ return listBuilder->Build();
+ } catch (const yexception&) {
+ return {};
+ }
+}
+
SIMPLE_MODULE(TYqlLangModule,
- TObfuscate
+ TObfuscate,
+ TRuleFreq
);
REGISTER_MODULES(TYqlLangModule);