summaryrefslogtreecommitdiffstats
path: root/library/cpp
diff options
context:
space:
mode:
authoruzhas <[email protected]>2026-03-31 14:17:51 +0300
committeruzhas <[email protected]>2026-03-31 14:57:34 +0300
commit94bef549caccdaa3e576c60a343261bd18cf025b (patch)
tree50bd18ba370aad145bebfb8e32d0aad6d1210d25 /library/cpp
parentec364e1f6eb7538970410f2ca8f6a9842cc6f3be (diff)
fix library/cpp/regex/pcre
commit_hash:6b6fb1b60f32209f80d85e4e33889e192a35ac07
Diffstat (limited to 'library/cpp')
-rw-r--r--library/cpp/regex/pcre/regexp.cpp25
-rw-r--r--library/cpp/regex/pcre/regexp_ut.cpp184
2 files changed, 200 insertions, 9 deletions
diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp
index 575c09cee40..678eac652ab 100644
--- a/library/cpp/regex/pcre/regexp.cpp
+++ b/library/cpp/regex/pcre/regexp.cpp
@@ -11,6 +11,7 @@ class TGlobalImpl : TNonCopyable {
private:
const char* Str;
regmatch_t* Pmatch;
+ int PmatchSize;
int Options;
int StrLen;
int StartOffset, NotEmptyOpts, MatchPos;
@@ -26,16 +27,17 @@ private:
private:
void CopyResults(int count) {
for (int i = 0; i < count; i++) {
+ if (MatchPos >= PmatchSize) {
+ ythrow yexception() << "TRegExBase::Exec(): Not enough space in pmatch array.";
+ }
Pmatch[MatchPos].rm_so = MatchBuf[2 * i];
Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1];
MatchPos++;
- if (MatchPos >= NMATCHES) {
- ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer.";
- }
}
}
int DoPcreExec(int opts) {
+ const int ovecsize = sizeof(MatchBuf) / sizeof(MatchBuf[0]);
int rc = pcre_exec(
PregComp, /* the compiled pattern */
nullptr, /* no extra data - we didn't study the pattern */
@@ -44,10 +46,11 @@ private:
StartOffset, /* start at offset 0 in the subject */
opts, /* default options */
MatchBuf, /* output vector for substring information */
- NMATCHES); /* number of elements in the output vector */
+ ovecsize); /* number of elements in the output vector */
if (rc == 0) {
- ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer.";
+ ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer. "
+ << "Pattern has too many capture groups for ovector size " << ovecsize << ".";
}
return rc;
@@ -76,16 +79,20 @@ private:
}
public:
- TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re)
+ TGlobalImpl(const char* st, regmatch_t* pma, int pma_size, int opts, pcre* pc_re)
: Str(st)
- , Pmatch(&pma)
+ , Pmatch(pma)
+ , PmatchSize(pma_size)
, Options(opts)
, StartOffset(0)
, NotEmptyOpts(0)
, MatchPos(0)
, PregComp(pc_re)
{
- memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES);
+ // Initialize entire pmatch array with -1 so caller can detect end of matches
+ memset(Pmatch, -1, sizeof(regmatch_t) * PmatchSize);
+ // Initialize MatchBuf to avoid UB when reading uninitialized values
+ memset(MatchBuf, 0, sizeof(MatchBuf));
StrLen = strlen(Str);
}
@@ -175,7 +182,7 @@ public:
if ((eflags & REG_NOTEOL) != 0)
options |= PCRE_NOTEOL;
- return TGlobalImpl(str, pmatch[0], options, (pcre*)Preg.re_pcre).ExecGlobal();
+ return TGlobalImpl(str, pmatch, nmatches, options, (pcre*)Preg.re_pcre).ExecGlobal();
}
}
diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp
index 5184e801cc6..f24e33b9e08 100644
--- a/library/cpp/regex/pcre/regexp_ut.cpp
+++ b/library/cpp/regex/pcre/regexp_ut.cpp
@@ -51,6 +51,11 @@ private:
UNIT_TEST(TestRe)
UNIT_TEST(TestSubst)
UNIT_TEST(TestOffEndOfBuffer);
+ UNIT_TEST(TestSmallArray)
+ UNIT_TEST(TestSmallArrayOverflow)
+ UNIT_TEST(Test99CaptureGroups)
+ UNIT_TEST(Test100CaptureGroups)
+ UNIT_TEST(Test300CaptureGroups)
UNIT_TEST_SUITE_END();
inline void TestRe() {
@@ -98,6 +103,185 @@ private:
const TString haystack{"fakty.ictv.ua"};
UNIT_ASSERT_VALUES_EQUAL(re.Match(haystack.c_str()), false);
}
+
+ // Test with small array to verify bounds checking works correctly
+ // Sanitizers will detect any buffer overflows
+ void TestSmallArray() {
+ // Create small array of 5 elements
+ regmatch_t smallArray[5] = {};
+
+ // Create regex
+ TRegExBase re("a+", REG_EXTENDED);
+
+ const char* testString = "aaa bbb aaa";
+
+ // Call Exec with REGEXP_GLOBAL and small array
+ // Our fix ensures memset only initializes 5 elements, not NMATCHES
+ int result = re.Exec(testString, smallArray, REGEXP_GLOBAL, 5);
+
+ // Should succeed without buffer overflow
+ UNIT_ASSERT_VALUES_EQUAL(result, 0);
+
+ // Verify we got matches
+ UNIT_ASSERT(smallArray[0].rm_so >= 0);
+ }
+
+ // Test that checks pmatch array bounds when number of capture groups
+ // exceeds pmatch size but is still much less than NMATCHES
+ void TestSmallArrayOverflow() {
+ // Create regex with 10 capture groups
+ TStringBuilder pattern;
+ pattern.Out.Reserve(10 * 4);
+ for (int i = 0; i < 10; i++) {
+ pattern << "(\\w)";
+ }
+
+ TString patternStr = pattern;
+ TRegExBase re(patternStr.c_str(), REG_EXTENDED);
+
+ // Allocate small array for only 5 matches (less than 10 groups + 1 full match = 11)
+ constexpr int SMALL_SIZE = 5;
+ regmatch_t smallArray[SMALL_SIZE] = {};
+
+ // String with 10 characters
+ TStringBuilder testString;
+ testString.Out.Reserve(10);
+ for (int i = 0; i < 10; i++) {
+ testString << char('a' + (i % 26));
+ }
+
+ TString testStr = testString;
+
+ // Should throw exception because we need 11 matches but array has only 5
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ re.Exec(testStr.c_str(), smallArray, REGEXP_GLOBAL, SMALL_SIZE),
+ yexception,
+ "Not enough space in pmatch array"
+ );
+ }
+
+ // Test for 99 capture groups - should succeed
+ // With ovecsize=300, pcre_exec can handle up to 100 matches (300/3=100)
+ // 99 groups + 1 full match = 100 matches, which fits exactly
+ void Test99CaptureGroups() {
+ // Create regex with 99 capture groups
+ TStringBuilder pattern;
+ pattern.Out.Reserve(99 * 4);
+ for (int i = 0; i < 99; i++) {
+ pattern << "(\\w)";
+ }
+
+ TString patternStr = pattern;
+ TRegExBase re(patternStr.c_str(), REG_EXTENDED);
+
+ // Allocate array for 100 matches: 1 full match + 99 capture groups
+ constexpr int EXPECTED_MATCHES = 100;
+ regmatch_t matches[EXPECTED_MATCHES] = {};
+
+ // String with 99 characters
+ TStringBuilder testString;
+ testString.Out.Reserve(99);
+ for (int i = 0; i < 99; i++) {
+ testString << char('a' + (i % 26));
+ }
+
+ TString testStr = testString;
+ Cerr << "Test99CaptureGroups: pattern length = " << patternStr.size() << Endl;
+
+ // Call with REGEXP_GLOBAL to use our code path with bounds checking
+ int result = re.Exec(testStr.c_str(), matches, REGEXP_GLOBAL, EXPECTED_MATCHES);
+
+ Cerr << "Test99CaptureGroups result: " << result << Endl;
+
+ // Should succeed with 99 groups
+ UNIT_ASSERT_VALUES_EQUAL(result, 0);
+
+ // Count found groups
+ int matchCount = 0;
+ for (int i = 0; i < EXPECTED_MATCHES; i++) {
+ if (matches[i].rm_so == -1) {
+ break;
+ }
+ matchCount++;
+ }
+
+ Cerr << "Captured groups: " << matchCount << Endl;
+
+ // Should capture exactly 100 matches (1 full + 99 groups)
+ UNIT_ASSERT_VALUES_EQUAL(matchCount, 100);
+ }
+
+ // Test for 100 capture groups - should throw exception
+ // 100 groups + 1 full match = 101 matches, which exceeds ovecsize/3=100 limit
+ // pcre_exec will return rc=0 indicating insufficient space
+ void Test100CaptureGroups() {
+ // Create regex with 100 capture groups
+ TStringBuilder pattern;
+ pattern.Out.Reserve(100 * 4);
+ for (int i = 0; i < 100; i++) {
+ pattern << "(\\w)";
+ }
+
+ TString patternStr = pattern;
+ TRegExBase re(patternStr.c_str(), REG_EXTENDED);
+
+ // Allocate array for 101 matches: 1 full match + 100 capture groups
+ constexpr int EXPECTED_MATCHES = 101;
+ regmatch_t matches[EXPECTED_MATCHES] = {};
+
+ // String with 100 characters
+ TStringBuilder testString;
+ testString.Out.Reserve(100);
+ for (int i = 0; i < 100; i++) {
+ testString << char('a' + (i % 26));
+ }
+
+ TString testStr = testString;
+ Cerr << "Test100CaptureGroups: pattern length = " << patternStr.size() << Endl;
+
+ // Should throw exception with message about insufficient space
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ re.Exec(testStr.c_str(), matches, REGEXP_GLOBAL, EXPECTED_MATCHES),
+ yexception,
+ "Not enough space in internal buffer"
+ );
+ }
+
+ // Test for 300 capture groups - should throw exception
+ // 300 groups + 1 full match = 301 matches, far exceeds ovecsize/3=100 limit
+ // pcre_exec will return rc=0 indicating insufficient space
+ void Test300CaptureGroups() {
+ // Create regex with 300 capture groups
+ TStringBuilder pattern;
+ pattern.Out.Reserve(300 * 4);
+ for (int i = 0; i < 300; i++) {
+ pattern << "(\\w)";
+ }
+
+ TString patternStr = pattern;
+ TRegExBase re(patternStr.c_str(), REG_EXTENDED);
+
+ // Allocate array for 301 matches: 1 full match + 300 capture groups
+ constexpr int EXPECTED_MATCHES = 301;
+ regmatch_t matches[EXPECTED_MATCHES] = {};
+
+ // String with 300 characters
+ TStringBuilder testString;
+ testString.Out.Reserve(300);
+ for (int i = 0; i < 300; i++) {
+ testString << char('a' + (i % 26));
+ }
+
+ TString testStr = testString;
+ Cerr << "Test300CaptureGroups: pattern length = " << patternStr.size() << Endl;
+
+ // Should throw exception with message about insufficient space
+ UNIT_ASSERT_EXCEPTION_CONTAINS(
+ re.Exec(testStr.c_str(), matches, REGEXP_GLOBAL, EXPECTED_MATCHES),
+ yexception,
+ "Not enough space in internal buffer"
+ );
+ }
};
UNIT_TEST_SUITE_REGISTRATION(TRegexpTest);