diff options
| author | vvvv <[email protected]> | 2025-05-28 16:58:57 +0300 | 
|---|---|---|
| committer | vvvv <[email protected]> | 2025-05-28 17:22:31 +0300 | 
| commit | afbcef3f55df5bc1054cd00a11f39cab08f83095 (patch) | |
| tree | ca16a478cfe58948eb9ac12e54147100af9ddf37 /yql/essentials/udfs | |
| parent | d84972b06e6d5e001155d1be72189c6a2ef7e7ac (diff) | |
fix re2 langver
fix
YQL-18878: Provide regex error from 2025.03 version
commit_hash:fdaeba4009bfbedccd39942909bcd303ae4ae932
Diffstat (limited to 'yql/essentials/udfs')
12 files changed, 46 insertions, 15 deletions
diff --git a/yql/essentials/udfs/common/re2/re2_udf.cpp b/yql/essentials/udfs/common/re2/re2_udf.cpp index 6d499725901..55c110b3458 100644 --- a/yql/essentials/udfs/common/re2/re2_udf.cpp +++ b/yql/essentials/udfs/common/re2/re2_udf.cpp @@ -1,3 +1,4 @@ +#include <yql/essentials/public/langver/yql_langver.h>  #include <yql/essentials/public/udf/udf_helpers.h>  #include <yql/essentials/public/udf/udf_type_ops.h>  #include <yql/essentials/public/udf/udf_value_builder.h> @@ -49,7 +50,10 @@ namespace {          return result;      } -    bool ShouldFailOnInvalidRegexp(const std::string_view regexp) { +    bool ShouldFailOnInvalidRegexp(const std::string_view regexp, NYql::TLangVersion currentLangVersion) { +        if (currentLangVersion >= NYql::MakeLangVersion(2025, 3)) { +            return true; +        }          THashType hash = GetStringHash(regexp) % 100;          ui64 failProbability = GetFailProbability();          return hash < failProbability; @@ -101,11 +105,13 @@ namespace {                  EMode mode,                  const TOptionsSchema& optionsSchema,                  TSourcePosition pos, +                NYql::TLangVersion currentlangVersion,                  const TRegexpGroups& regexpGroups = TRegexpGroups())                  : Mode(mode)                  , OptionsSchema(optionsSchema)                  , Pos_(pos)                  , RegexpGroups(regexpGroups) +                , CurrentLangVersion(currentlangVersion)              {              } @@ -121,13 +127,15 @@ namespace {                          Mode,                          posix,                          OptionsSchema, -                        Pos_)); +                        Pos_, +                        CurrentLangVersion));              }              EMode Mode;              const TOptionsSchema OptionsSchema;              TSourcePosition Pos_;              const TRegexpGroups RegexpGroups; +            NYql::TLangVersion CurrentLangVersion;          };          static const TStringRef& Name(EMode mode) { @@ -162,13 +170,14 @@ namespace {              EMode mode,              bool posix,              const TOptionsSchema& optionsSchema, -            TSourcePosition pos) +            TSourcePosition pos, +            NYql::TLangVersion currentLangVersion)              : RegexpGroups(regexpGroups)              , Mode(mode)              , Captured()              , OptionsSchema(optionsSchema)              , Pos_(pos) -        { +            , CurrentLangVersion(currentLangVersion) {              try {                  auto patternValue = runConfig.GetElement(0);                  auto optionsValue = runConfig.GetElement(1); @@ -191,7 +200,7 @@ namespace {                  Regexp = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); -                if (!Regexp->ok() && ShouldFailOnInvalidRegexp(pattern)) { +                if (!Regexp->ok() && ShouldFailOnInvalidRegexp(pattern, CurrentLangVersion)) {                      throw yexception() << FormatRegexpError(*Regexp);                  } @@ -289,6 +298,7 @@ namespace {          std::unique_ptr<StringPiece[]> Captured;          const TOptionsSchema OptionsSchema;          TSourcePosition Pos_; +        NYql::TLangVersion CurrentLangVersion;          TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const {              TUnboxedValue* items = nullptr; @@ -488,7 +498,7 @@ namespace {                  if (!typesOnly) {                      const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; -                    builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition())); +                    builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer()));                  }              } else if (isCapture) {                  TRegexpGroups groups; @@ -529,7 +539,7 @@ namespace {                      if (!typesOnly) {                          builder.Implementation( -                            new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), groups)); +                            new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer(), groups));                      }                  } else { @@ -541,20 +551,20 @@ namespace {                      .RunConfig(MakeRunConfigType(builder, optOptionsStructType));                  if (!typesOnly) { -                    builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition())); +                    builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer()));                  }              } else if (isCount) {                  builder.SimpleSignature<ui32(TOptional<char*>)>()                      .RunConfig(MakeRunConfigType(builder, optOptionsStructType));                  if (!typesOnly) { -                    builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition())); +                    builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer()));                  }              } else if (isFindAndConsume) {                  builder.SimpleSignature<TListType<char*>(TOptional<char*>)>()                      .RunConfig(MakeRunConfigType(builder, optOptionsStructType));                  if (!typesOnly) { -                    builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition())); +                    builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer()));                  }              } else if (!(                              TEscape::DeclareSignature(name, userType, builder, typesOnly) || diff --git a/yql/essentials/udfs/common/re2/test/canondata/result.json b/yql/essentials/udfs/common/re2/test/canondata/result.json index 84e161d9283..75db00ebaf8 100644 --- a/yql/essentials/udfs/common/re2/test/canondata/result.json +++ b/yql/essentials/udfs/common/re2/test/canondata/result.json @@ -29,14 +29,19 @@              "uri": "file://test.test_InvalidCaptureRegexFail_/extracted"          }      ], -    "test.test[InvalidRegexFail]": [ +    "test.test[InvalidRegexFail_2025.02]": [          { -            "uri": "file://test.test_InvalidRegexFail_/extracted" +            "uri": "file://test.test_InvalidRegexFail_2025.02_/extracted"          }      ], -    "test.test[InvalidRegexSuccess]": [ +    "test.test[InvalidRegexFail_2025.03]": [          { -            "uri": "file://test.test_InvalidRegexSuccess_/results.txt" +            "uri": "file://test.test_InvalidRegexFail_2025.03_/extracted" +        } +    ], +    "test.test[InvalidRegexSuccess_2025.02]": [ +        { +            "uri": "file://test.test_InvalidRegexSuccess_2025.02_/results.txt"          }      ],      "test.test[LikeEscape]": [ diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_/extracted b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.02_/extracted index f1fda30e02a..f1fda30e02a 100644 --- a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_/extracted +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.02_/extracted diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.03_/extracted b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.03_/extracted new file mode 100644 index 00000000000..f1fda30e02a --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexFail_2025.03_/extracted @@ -0,0 +1,8 @@ +<tmp_path>/program.sql:<main>: Error: Execution + +    <tmp_path>/program.sql:<main>:8:1: Error: Execution of node: Result +    	SELECT $invalidRe("abaa"); +	^ +        <tmp_path>/program.sql:<main>:4:19: Error: Regexp compilation failed. Regexp: "[". Original error is: "missing ]: [" +        	$invalidRe = Re2::FindAndConsume("["); +	                  ^
\ No newline at end of file diff --git a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_/results.txt b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_2025.02_/results.txt index c37c844a342..c37c844a342 100644 --- a/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_/results.txt +++ b/yql/essentials/udfs/common/re2/test/canondata/test.test_InvalidRegexSuccess_2025.02_/results.txt diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.cfg b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.cfg index 691aa9d9b6f..c0ef97affef 100644 --- a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.cfg +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.cfg @@ -1,2 +1,3 @@  xfail  env YQL_RE2_REGEXP_PROBABILITY_FAIL 100 +langver 2025.02 diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.sql b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.sql index 087dcab2efd..087dcab2efd 100644 --- a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail.sql +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.02.sql diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.cfg b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.cfg new file mode 100644 index 00000000000..e37bb6b41a2 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.cfg @@ -0,0 +1,2 @@ +langver 2025.03 +xfail diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess.sql b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.sql index 087dcab2efd..087dcab2efd 100644 --- a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess.sql +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexFail_2025.03.sql diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.cfg b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.cfg new file mode 100644 index 00000000000..367bc6a9ec0 --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.cfg @@ -0,0 +1 @@ +langver 2025.02 diff --git a/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.sql b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.sql new file mode 100644 index 00000000000..087dcab2efd --- /dev/null +++ b/yql/essentials/udfs/common/re2/test/cases/InvalidRegexSuccess_2025.02.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +$invalidRe = Re2::FindAndConsume("["); + +SELECT $invalidRe("abaa"); diff --git a/yql/essentials/udfs/common/re2/ya.make b/yql/essentials/udfs/common/re2/ya.make index 7e554133486..ca8be7370ba 100644 --- a/yql/essentials/udfs/common/re2/ya.make +++ b/yql/essentials/udfs/common/re2/ya.make @@ -2,7 +2,7 @@ YQL_UDF_CONTRIB(re2_udf)      YQL_ABI_VERSION(          2 -        28 +        43          0      )  | 
