diff options
author | fedor-miron <fedor-miron@yandex-team.com> | 2023-10-09 22:34:20 +0300 |
---|---|---|
committer | fedor-miron <fedor-miron@yandex-team.com> | 2023-10-09 22:50:50 +0300 |
commit | bbf2b6878af3854815a2c0ecb07a687071787639 (patch) | |
tree | 1d580c75738617c1acc8a9956eb762ad39130f55 | |
parent | 8c020b4b0b9323674b5c40875fed4ffb9fada74a (diff) | |
download | ydb-bbf2b6878af3854815a2c0ecb07a687071787639.tar.gz |
YQL-16462: add diagnostic on duplicate capturing group names
5 files changed, 30 insertions, 0 deletions
diff --git a/ydb/library/yql/udfs/common/re2/re2_udf.cpp b/ydb/library/yql/udfs/common/re2/re2_udf.cpp index 6b261f4019..cfc301479d 100644 --- a/ydb/library/yql/udfs/common/re2/re2_udf.cpp +++ b/ydb/library/yql/udfs/common/re2/re2_udf.cpp @@ -457,6 +457,7 @@ namespace { const auto& groupNames = regexp.CapturingGroupNames(); int groupCount = regexp.NumberOfCapturingGroups(); if (groupCount >= 0) { + std::unordered_set<std::string_view> groupNamesSet; int unnamedCount = 0; ++groupCount; groups.Indexes.resize(groupCount); @@ -465,6 +466,11 @@ namespace { TString fieldName; auto it = groupNames.find(i); if (it != groupNames.end()) { + if (!groupNamesSet.insert(it->second).second) { + builder.SetError( + TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second); + return; + } fieldName = it->second; } else { fieldName = "_" + ToString(unnamedCount); diff --git a/ydb/library/yql/udfs/common/re2/test/canondata/result.json b/ydb/library/yql/udfs/common/re2/test/canondata/result.json index 2be3e88681..5a7f2dafc9 100644 --- a/ydb/library/yql/udfs/common/re2/test/canondata/result.json +++ b/ydb/library/yql/udfs/common/re2/test/canondata/result.json @@ -19,6 +19,11 @@ "uri": "file://test.test_DefOptions_/results.txt" } ], + "test.test[MultipleCaptureGroups]": [ + { + "uri": "file://test.test_MultipleCaptureGroups_/extracted" + } + ], "test.test[MutableLambda]": [ { "uri": "file://test.test_MutableLambda_/results.txt" diff --git a/ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted b/ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted new file mode 100644 index 0000000000..2441849448 --- /dev/null +++ b/ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted @@ -0,0 +1,14 @@ +<tmp_path>/program.sql:<main>: Error: Type annotation + + <tmp_path>/program.sql:<main>:8:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem + select $regexp("abc"); + ^ + <tmp_path>/program.sql:<main>:8:8: Error: At function: Apply + select $regexp("abc"); + ^ + <tmp_path>/program.sql:<main>:4:16: Error: At function: Udf, At Re2.Capture + $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)"); + ^ + <tmp_path>/program.sql:<main>:4:16: Error: Failed to find UDF function: Re2.Capture, reason: Error: Module: Re2, function: Capture, error: Regexp contains duplicate capturing group name: groupname1 + $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)"); + ^
\ No newline at end of file diff --git a/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg new file mode 100644 index 0000000000..eb2e5315d1 --- /dev/null +++ b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg @@ -0,0 +1 @@ +xfail
\ No newline at end of file diff --git a/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql new file mode 100644 index 0000000000..49e0da34fd --- /dev/null +++ b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql @@ -0,0 +1,4 @@ +/* syntax version 1 */ +$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)"); + +select $regexp("abc");
\ No newline at end of file |