diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-26 15:13:45 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2025-02-26 15:26:42 +0300 |
commit | 5b32975d31ed07a20d7799a128befa1b04991bd5 (patch) | |
tree | eec671e9b5dd644fdd3ffe11ba9ce6315d8b783c | |
parent | 3364836723dd075c997fb496d86e02d64c626533 (diff) | |
download | ydb-5b32975d31ed07a20d7799a128befa1b04991bd5.tar.gz |
Intermediate changes
commit_hash:3acdd39aebcbbdab2ce4ac95a183701d25b9fe43
-rw-r--r-- | yql/essentials/udfs/language/yql/ya.make | 3 | ||||
-rw-r--r-- | yql/essentials/udfs/language/yql/yql_language_udf.cpp | 112 |
2 files changed, 113 insertions, 2 deletions
diff --git a/yql/essentials/udfs/language/yql/ya.make b/yql/essentials/udfs/language/yql/ya.make index 90fe8dfbab..5167463ba4 100644 --- a/yql/essentials/udfs/language/yql/ya.make +++ b/yql/essentials/udfs/language/yql/ya.make @@ -2,7 +2,7 @@ YQL_UDF(yql_language_udf) YQL_ABI_VERSION( 2 - 22 + 39 0 ) @@ -21,6 +21,7 @@ PEERDIR( yql/essentials/sql/v1/proto_parser/antlr4_ansi yql/essentials/sql/pg_dummy yql/essentials/sql/v1/format + library/cpp/protobuf/util ) END() diff --git a/yql/essentials/udfs/language/yql/yql_language_udf.cpp b/yql/essentials/udfs/language/yql/yql_language_udf.cpp index ea5c55760b..356ecfade3 100644 --- a/yql/essentials/udfs/language/yql/yql_language_udf.cpp +++ b/yql/essentials/udfs/language/yql/yql_language_udf.cpp @@ -2,14 +2,70 @@ #include <yql/essentials/sql/v1/lexer/antlr4/lexer.h> #include <yql/essentials/sql/v1/lexer/antlr4_ansi/lexer.h> +#include <yql/essentials/sql/v1/proto_parser/proto_parser.h> #include <yql/essentials/sql/v1/proto_parser/antlr4/proto_parser.h> #include <yql/essentials/sql/v1/proto_parser/antlr4_ansi/proto_parser.h> #include <yql/essentials/sql/v1/format/sql_format.h> +#include <yql/essentials/sql/settings/translation_settings.h> +#include <library/cpp/protobuf/util/simple_reflection.h> using namespace NYql; using namespace NKikimr::NUdf; using namespace NSQLTranslation; +class TRuleFreqVisitor { +public: + TRuleFreqVisitor() { + } + + void Visit(const NProtoBuf::Message& msg) { + const NProtoBuf::Descriptor* descr = msg.GetDescriptor(); + if (descr->name() == "TToken") { + return; + } + + TStringBuf fullName = descr->full_name(); + fullName.SkipPrefix("NSQLv1Generated."); + for (int i = 0; i < descr->field_count(); ++i) { + const NProtoBuf::FieldDescriptor* fd = descr->field(i); + NProtoBuf::TConstField field(msg, fd); + if (!field.HasValue()) { + continue; + } + + TStringBuf fieldFullName = fd->full_name(); + fieldFullName.SkipPrefix("NSQLv1Generated."); + if (fieldFullName.EndsWith(".Descr")) { + continue; + } + + + Freqs[std::make_pair(fullName, fieldFullName)] += 1; + } + + VisitAllFields(msg, descr); + } + + const THashMap<std::pair<TString, TString>, ui64>& GetFreqs() const { + return Freqs; + } + +private: + void VisitAllFields(const NProtoBuf::Message& msg, const NProtoBuf::Descriptor* descr) { + for (int i = 0; i < descr->field_count(); ++i) { + const NProtoBuf::FieldDescriptor* fd = descr->field(i); + NProtoBuf::TConstField field(msg, fd); + if (field.IsMessage()) { + for (size_t j = 0; j < field.Size(); ++j) { + Visit(*field.Get<NProtoBuf::Message>(j)); + } + } + } + } + + THashMap<std::pair<TString, TString>, ui64> Freqs; +}; + SIMPLE_UDF(TObfuscate, TOptional<char*>(TAutoMap<char*>)) { using namespace NSQLFormat; try { @@ -35,8 +91,62 @@ SIMPLE_UDF(TObfuscate, TOptional<char*>(TAutoMap<char*>)) { } } +using TRuleFreqResult = TListType<TTuple<char*, char*, ui64>>; + +SIMPLE_UDF(TRuleFreq, TOptional<TRuleFreqResult>(TAutoMap<char*>)) { + try { + const TString query(args[0].AsStringRef()); + NYql::TIssues issues; + google::protobuf::Arena arena; + NSQLTranslation::TTranslationSettings settings; + settings.Arena = &arena; + if (!ParseTranslationSettings(query, settings, issues)) { + return {}; + } + + NSQLTranslationV1::TLexers lexers; + lexers.Antlr4 = NSQLTranslationV1::MakeAntlr4LexerFactory(); + lexers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiLexerFactory(); + auto lexer = NSQLTranslationV1::MakeLexer(lexers, settings.AnsiLexer, true); + auto onNextToken = [&](NSQLTranslation::TParsedToken&& token) { + Y_UNUSED(token); + }; + + if (!lexer->Tokenize(query, "", onNextToken, issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS)) { + return {}; + } + + NSQLTranslationV1::TParsers parsers; + parsers.Antlr4 = NSQLTranslationV1::MakeAntlr4ParserFactory(); + parsers.Antlr4Ansi = NSQLTranslationV1::MakeAntlr4AnsiParserFactory(); + auto msg = NSQLTranslationV1::SqlAST(parsers, query, "", issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, + settings.AnsiLexer, true, &arena); + if (!msg) { + return {}; + } + + TRuleFreqVisitor visitor; + visitor.Visit(*msg); + + auto listBuilder = valueBuilder->NewListBuilder(); + for (const auto& [key, f] : visitor.GetFreqs()) { + TUnboxedValue* items; + auto tuple = valueBuilder->NewArray(3, items); + items[0] = valueBuilder->NewString(key.first); + items[1] = valueBuilder->NewString(key.second); + items[2] = TUnboxedValuePod(f); + listBuilder->Add(std::move(tuple)); + } + + return listBuilder->Build(); + } catch (const yexception&) { + return {}; + } +} + SIMPLE_MODULE(TYqlLangModule, - TObfuscate + TObfuscate, + TRuleFreq ); REGISTER_MODULES(TYqlLangModule); |