aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfedor-miron <fedor-miron@yandex-team.com>2023-10-09 22:34:20 +0300
committerfedor-miron <fedor-miron@yandex-team.com>2023-10-09 22:50:50 +0300
commitbbf2b6878af3854815a2c0ecb07a687071787639 (patch)
tree1d580c75738617c1acc8a9956eb762ad39130f55
parent8c020b4b0b9323674b5c40875fed4ffb9fada74a (diff)
downloadydb-bbf2b6878af3854815a2c0ecb07a687071787639.tar.gz
YQL-16462: add diagnostic on duplicate capturing group names
-rw-r--r--ydb/library/yql/udfs/common/re2/re2_udf.cpp6
-rw-r--r--ydb/library/yql/udfs/common/re2/test/canondata/result.json5
-rw-r--r--ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted14
-rw-r--r--ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg1
-rw-r--r--ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql4
5 files changed, 30 insertions, 0 deletions
diff --git a/ydb/library/yql/udfs/common/re2/re2_udf.cpp b/ydb/library/yql/udfs/common/re2/re2_udf.cpp
index 6b261f4019..cfc301479d 100644
--- a/ydb/library/yql/udfs/common/re2/re2_udf.cpp
+++ b/ydb/library/yql/udfs/common/re2/re2_udf.cpp
@@ -457,6 +457,7 @@ namespace {
const auto& groupNames = regexp.CapturingGroupNames();
int groupCount = regexp.NumberOfCapturingGroups();
if (groupCount >= 0) {
+ std::unordered_set<std::string_view> groupNamesSet;
int unnamedCount = 0;
++groupCount;
groups.Indexes.resize(groupCount);
@@ -465,6 +466,11 @@ namespace {
TString fieldName;
auto it = groupNames.find(i);
if (it != groupNames.end()) {
+ if (!groupNamesSet.insert(it->second).second) {
+ builder.SetError(
+ TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second);
+ return;
+ }
fieldName = it->second;
} else {
fieldName = "_" + ToString(unnamedCount);
diff --git a/ydb/library/yql/udfs/common/re2/test/canondata/result.json b/ydb/library/yql/udfs/common/re2/test/canondata/result.json
index 2be3e88681..5a7f2dafc9 100644
--- a/ydb/library/yql/udfs/common/re2/test/canondata/result.json
+++ b/ydb/library/yql/udfs/common/re2/test/canondata/result.json
@@ -19,6 +19,11 @@
"uri": "file://test.test_DefOptions_/results.txt"
}
],
+ "test.test[MultipleCaptureGroups]": [
+ {
+ "uri": "file://test.test_MultipleCaptureGroups_/extracted"
+ }
+ ],
"test.test[MutableLambda]": [
{
"uri": "file://test.test_MutableLambda_/results.txt"
diff --git a/ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted b/ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted
new file mode 100644
index 0000000000..2441849448
--- /dev/null
+++ b/ydb/library/yql/udfs/common/re2/test/canondata/test.test_MultipleCaptureGroups_/extracted
@@ -0,0 +1,14 @@
+<tmp_path>/program.sql:<main>: Error: Type annotation
+
+ <tmp_path>/program.sql:<main>:8:1: Error: At function: RemovePrefixMembers, At function: Unordered, At function: PersistableRepr, At function: OrderedSqlProject, At function: SqlProjectItem
+ select $regexp("abc");
+ ^
+ <tmp_path>/program.sql:<main>:8:8: Error: At function: Apply
+ select $regexp("abc");
+ ^
+ <tmp_path>/program.sql:<main>:4:16: Error: At function: Udf, At Re2.Capture
+ $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+ ^
+ <tmp_path>/program.sql:<main>:4:16: Error: Failed to find UDF function: Re2.Capture, reason: Error: Module: Re2, function: Capture, error: Regexp contains duplicate capturing group name: groupname1
+ $regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+ ^ \ No newline at end of file
diff --git a/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg
new file mode 100644
index 0000000000..eb2e5315d1
--- /dev/null
+++ b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.cfg
@@ -0,0 +1 @@
+xfail \ No newline at end of file
diff --git a/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql
new file mode 100644
index 0000000000..49e0da34fd
--- /dev/null
+++ b/ydb/library/yql/udfs/common/re2/test/cases/MultipleCaptureGroups.sql
@@ -0,0 +1,4 @@
+/* syntax version 1 */
+$regexp = Re2::Capture("(?P<groupname1>a)(?P<groupname2>b)(?<groupname1>c)");
+
+select $regexp("abc"); \ No newline at end of file