diff options
author | vvvv <vvvv@yandex-team.com> | 2025-05-28 16:58:57 +0300 |
---|---|---|
committer | vvvv <vvvv@yandex-team.com> | 2025-05-28 17:22:31 +0300 |
commit | afbcef3f55df5bc1054cd00a11f39cab08f83095 (patch) | |
tree | ca16a478cfe58948eb9ac12e54147100af9ddf37 | |
parent | d84972b06e6d5e001155d1be72189c6a2ef7e7ac (diff) | |
download | ydb-afbcef3f55df5bc1054cd00a11f39cab08f83095.tar.gz |
fix re2 langver
fix
YQL-18878: Provide regex error from 2025.03 version
commit_hash:fdaeba4009bfbedccd39942909bcd303ae4ae932
14 files changed, 49 insertions, 15 deletions
diff --git a/yql/essentials/minikql/computation/mkql_computation_node.cpp b/yql/essentials/minikql/computation/mkql_computation_node.cpp index 7fa810fb8cd..6625d3a6eef 100644 --- a/yql/essentials/minikql/computation/mkql_computation_node.cpp +++ b/yql/essentials/minikql/computation/mkql_computation_node.cpp @@ -60,6 +60,7 @@ TComputationContext::TComputationContext(const THolderFactory& holderFactory, , CountersProvider(opts.CountersProvider) , SecureParamsProvider(opts.SecureParamsProvider) , LogProvider(opts.LogProvider) + , LangVer(opts.LangVer) { std::fill_n(MutableValues.get(), mutables.CurValueIndex, NUdf::TUnboxedValue(NUdf::TUnboxedValuePod::Invalid())); diff --git a/yql/essentials/tools/udf_resolver/udf_resolver.cpp b/yql/essentials/tools/udf_resolver/udf_resolver.cpp index 03116858f7e..9db127d0864 100644 --- a/yql/essentials/tools/udf_resolver/udf_resolver.cpp +++ b/yql/essentials/tools/udf_resolver/udf_resolver.cpp @@ -194,6 +194,8 @@ void ResolveUDFs() { udfRes->SetSupportsBlocks(funcInfo.SupportsBlocks); udfRes->SetIsStrict(funcInfo.IsStrict); + udfRes->SetMinLangVer(funcInfo.MinLangVer); + udfRes->SetMaxLangVer(funcInfo.MaxLangVer); } catch (yexception& e) { udfRes->SetError(TStringBuilder() << "Internal error was found when udf metadata is loading for function: " << udf.GetName() diff --git a/yql/essentials/udfs/common/re2/re2_udf.cpp b/yql/essentials/udfs/common/re2/re2_udf.cpp index 6d499725901..55c110b3458 100644 --- a/yql/essentials/udfs/common/re2/re2_udf.cpp +++ b/yql/essentials/udfs/common/re2/re2_udf.cpp @@ -1,3 +1,4 @@ +#include <yql/essentials/public/langver/yql_langver.h> #include <yql/essentials/public/udf/udf_helpers.h> #include <yql/essentials/public/udf/udf_type_ops.h> #include <yql/essentials/public/udf/udf_value_builder.h> @@ -49,7 +50,10 @@ namespace { return result; } - bool ShouldFailOnInvalidRegexp(const std::string_view regexp) { + bool ShouldFailOnInvalidRegexp(const std::string_view regexp, NYql::TLangVersion currentLangVersion) { + if (currentLangVersion >= NYql::MakeLangVersion(2025, 3)) { + return true; + } THashType hash = GetStringHash(regexp) % 100; ui64 failProbability = GetFailProbability(); return hash < failProbability; @@ -101,11 +105,13 @@ namespace { EMode mode, const TOptionsSchema& optionsSchema, TSourcePosition pos, + NYql::TLangVersion currentlangVersion, const TRegexpGroups& regexpGroups = TRegexpGroups()) : Mode(mode) , OptionsSchema(optionsSchema) , Pos_(pos) , RegexpGroups(regexpGroups) + , CurrentLangVersion(currentlangVersion) { } @@ -121,13 +127,15 @@ namespace { Mode, posix, OptionsSchema, - Pos_)); + Pos_, + CurrentLangVersion)); } EMode Mode; const TOptionsSchema OptionsSchema; TSourcePosition Pos_; const TRegexpGroups RegexpGroups; + NYql::TLangVersion CurrentLangVersion; }; static const TStringRef& Name(EMode mode) { @@ -162,13 +170,14 @@ namespace { EMode mode, bool posix, const TOptionsSchema& optionsSchema, - TSourcePosition pos) + TSourcePosition pos, + NYql::TLangVersion currentLangVersion) : RegexpGroups(regexpGroups) , Mode(mode) , Captured() , OptionsSchema(optionsSchema) , Pos_(pos) - { + , CurrentLangVersion(currentLangVersion) { try { auto patternValue = runConfig.GetElement(0); auto optionsValue = runConfig.GetElement(1); @@ -191,7 +200,7 @@ namespace { Regexp = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); - if (!Regexp->ok() && ShouldFailOnInvalidRegexp(pattern)) { + if (!Regexp->ok() && ShouldFailOnInvalidRegexp(pattern, CurrentLangVersion)) { throw yexception() << FormatRegexpError(*Regexp); } @@ -289,6 +298,7 @@ namespace { std::unique_ptr<StringPiece[]> Captured; const TOptionsSchema OptionsSchema; TSourcePosition Pos_; + NYql::TLangVersion CurrentLangVersion; TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const { TUnboxedValue* items = nullptr; @@ -488,7 +498,7 @@ namespace { if (!typesOnly) { const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; - builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition())); + builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); } } else if (isCapture) { TRegexpGroups groups; @@ -529,7 +539,7 @@ namespace { if (!typesOnly) { builder.Implementation( - new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), groups)); + new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer(), groups)); } } else { @@ -541,20 +551,20 @@ namespace { .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition())); + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); } } else if (isCount) { builder.SimpleSignature<ui32(TOptional<char*>)>() .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition())); + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); } } else if (isFindAndConsume) { builder.SimpleSignature<TListType<char*>(TOptional<char*>)>() .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition())); + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); } } else if (!( TEscape::DeclareSignature(name, userType, builder, typesOnly) || diff --git a/yql/essentials/udfs/common/re2/test/canondata/result.json b/yql/essentials/udfs/common/re2/test/canondata/result.json index 84e161d9283..75db00ebaf8 100644 --- a/yql/essentials/udfs/common/re2/test/canondata/result.json +++ b/yql/essentials/udfs/common/re2/test/canondata/result.json @@ -29,14 +29,19 @@ "uri": "file://test.test_InvalidCaptureRegexFail_/extracted" } ], - "test.test[InvalidRegexFail]": [ + "test.test[InvalidRegexFail_2025.02]": [ { - "uri": "file://test.test_InvalidRegexFail_/extracted" + "uri": "file://test.test_InvalidRegexFail_2025.02_/extracted" } ], - "test.test[InvalidRegexSuccess]": [ + "test.test[InvalidRegexFail_2025.03]": [ { - "uri": "file://test.test_InvalidRegexSuccess_/results.txt" + "uri": "file://test.test_InvalidRegexFail_2025.03_/extracted" + } + ], + "test.test[InvalidRegexSuccess_2025.02]": [ + { + "uri": "file://test.test_InvalidRegexSuccess_2025.02_/results.txt" } ], "test.test[LikeEscape]": [ diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_/extracted b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.02_/extracted index f1fda30e02a..f1fda30e02a 100644 --- a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_/extracted +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.02_/extracted diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.03_/extracted b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.03_/extracted new file mode 100644 index 00000000000..f1fda30e02a --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.03_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + + <tmp_path>/program.sql:<main>:8:1: Error: Execution of node: Result + SELECT $invalidRe("abaa"); + ^ + <tmp_path>/program.sql:<main>:4:19: Error: Regexp compilation failed. Regexp: "[". Original error is: "missing ]: [" + $invalidRe = Re2::FindAndConsume("["); + ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_2025.02_/results.txt index c37c844a342..c37c844a342 100644 --- a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_/results.txt +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_2025.02_/results.txt diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.cfg b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.cfg index 691aa9d9b6f..c0ef97affef 100644 --- a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.cfg +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.cfg @@ -1,2 +1,3 @@ xfail env YQL_RE2_REGEXP_PROBABILITY_FAIL 100 +langver 2025.02 diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.sql b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.sql index 087dcab2efd..087dcab2efd 100644 --- a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.sql +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.sql diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.cfg b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.cfg new file mode 100644 index 00000000000..e37bb6b41a2 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.cfg @@ -0,0 +1,2 @@ +langver 2025.03 +xfail diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess.sql b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.sql index 087dcab2efd..087dcab2efd 100644 --- a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess.sql +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.sql diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.cfg b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.cfg new file mode 100644 index 00000000000..367bc6a9ec0 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.cfg @@ -0,0 +1 @@ +langver 2025.02 diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.sql b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.sql new file mode 100644 index 00000000000..087dcab2efd --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +$invalidRe = Re2::FindAndConsume("["); + +SELECT $invalidRe("abaa"); diff --git a/yql/essentials/udfs/common/re2/ya.make b/yql/essentials/udfs/common/re2/ya.make index 7e554133486..ca8be7370ba 100644 --- a/yql/essentials/udfs/common/re2/ya.make +++ b/yql/essentials/udfs/common/re2/ya.make @@ -2,7 +2,7 @@ YQL_UDF_CONTRIB(re2_udf) YQL_ABI_VERSION( 2 - 28 + 43 0 ) |