diff options
| author | uzhas <[email protected]> | 2026-03-31 14:17:51 +0300 |
|---|---|---|
| committer | uzhas <[email protected]> | 2026-03-31 14:57:34 +0300 |
| commit | 94bef549caccdaa3e576c60a343261bd18cf025b (patch) | |
| tree | 50bd18ba370aad145bebfb8e32d0aad6d1210d25 /library/cpp | |
| parent | ec364e1f6eb7538970410f2ca8f6a9842cc6f3be (diff) | |
fix library/cpp/regex/pcre
commit_hash:6b6fb1b60f32209f80d85e4e33889e192a35ac07
Diffstat (limited to 'library/cpp')
| -rw-r--r-- | library/cpp/regex/pcre/regexp.cpp | 25 | ||||
| -rw-r--r-- | library/cpp/regex/pcre/regexp_ut.cpp | 184 |
2 files changed, 200 insertions, 9 deletions
diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp index 575c09cee40..678eac652ab 100644 --- a/library/cpp/regex/pcre/regexp.cpp +++ b/library/cpp/regex/pcre/regexp.cpp @@ -11,6 +11,7 @@ class TGlobalImpl : TNonCopyable { private: const char* Str; regmatch_t* Pmatch; + int PmatchSize; int Options; int StrLen; int StartOffset, NotEmptyOpts, MatchPos; @@ -26,16 +27,17 @@ private: private: void CopyResults(int count) { for (int i = 0; i < count; i++) { + if (MatchPos >= PmatchSize) { + ythrow yexception() << "TRegExBase::Exec(): Not enough space in pmatch array."; + } Pmatch[MatchPos].rm_so = MatchBuf[2 * i]; Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1]; MatchPos++; - if (MatchPos >= NMATCHES) { - ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; - } } } int DoPcreExec(int opts) { + const int ovecsize = sizeof(MatchBuf) / sizeof(MatchBuf[0]); int rc = pcre_exec( PregComp, /* the compiled pattern */ nullptr, /* no extra data - we didn't study the pattern */ @@ -44,10 +46,11 @@ private: StartOffset, /* start at offset 0 in the subject */ opts, /* default options */ MatchBuf, /* output vector for substring information */ - NMATCHES); /* number of elements in the output vector */ + ovecsize); /* number of elements in the output vector */ if (rc == 0) { - ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; + ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer. " + << "Pattern has too many capture groups for ovector size " << ovecsize << "."; } return rc; @@ -76,16 +79,20 @@ private: } public: - TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re) + TGlobalImpl(const char* st, regmatch_t* pma, int pma_size, int opts, pcre* pc_re) : Str(st) - , Pmatch(&pma) + , Pmatch(pma) + , PmatchSize(pma_size) , Options(opts) , StartOffset(0) , NotEmptyOpts(0) , MatchPos(0) , PregComp(pc_re) { - memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES); + // Initialize entire pmatch array with -1 so caller can detect end of matches + memset(Pmatch, -1, sizeof(regmatch_t) * PmatchSize); + // Initialize MatchBuf to avoid UB when reading uninitialized values + memset(MatchBuf, 0, sizeof(MatchBuf)); StrLen = strlen(Str); } @@ -175,7 +182,7 @@ public: if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL; - return TGlobalImpl(str, pmatch[0], options, (pcre*)Preg.re_pcre).ExecGlobal(); + return TGlobalImpl(str, pmatch, nmatches, options, (pcre*)Preg.re_pcre).ExecGlobal(); } } diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp index 5184e801cc6..f24e33b9e08 100644 --- a/library/cpp/regex/pcre/regexp_ut.cpp +++ b/library/cpp/regex/pcre/regexp_ut.cpp @@ -51,6 +51,11 @@ private: UNIT_TEST(TestRe) UNIT_TEST(TestSubst) UNIT_TEST(TestOffEndOfBuffer); + UNIT_TEST(TestSmallArray) + UNIT_TEST(TestSmallArrayOverflow) + UNIT_TEST(Test99CaptureGroups) + UNIT_TEST(Test100CaptureGroups) + UNIT_TEST(Test300CaptureGroups) UNIT_TEST_SUITE_END(); inline void TestRe() { @@ -98,6 +103,185 @@ private: const TString haystack{"fakty.ictv.ua"}; UNIT_ASSERT_VALUES_EQUAL(re.Match(haystack.c_str()), false); } + + // Test with small array to verify bounds checking works correctly + // Sanitizers will detect any buffer overflows + void TestSmallArray() { + // Create small array of 5 elements + regmatch_t smallArray[5] = {}; + + // Create regex + TRegExBase re("a+", REG_EXTENDED); + + const char* testString = "aaa bbb aaa"; + + // Call Exec with REGEXP_GLOBAL and small array + // Our fix ensures memset only initializes 5 elements, not NMATCHES + int result = re.Exec(testString, smallArray, REGEXP_GLOBAL, 5); + + // Should succeed without buffer overflow + UNIT_ASSERT_VALUES_EQUAL(result, 0); + + // Verify we got matches + UNIT_ASSERT(smallArray[0].rm_so >= 0); + } + + // Test that checks pmatch array bounds when number of capture groups + // exceeds pmatch size but is still much less than NMATCHES + void TestSmallArrayOverflow() { + // Create regex with 10 capture groups + TStringBuilder pattern; + pattern.Out.Reserve(10 * 4); + for (int i = 0; i < 10; i++) { + pattern << "(\\w)"; + } + + TString patternStr = pattern; + TRegExBase re(patternStr.c_str(), REG_EXTENDED); + + // Allocate small array for only 5 matches (less than 10 groups + 1 full match = 11) + constexpr int SMALL_SIZE = 5; + regmatch_t smallArray[SMALL_SIZE] = {}; + + // String with 10 characters + TStringBuilder testString; + testString.Out.Reserve(10); + for (int i = 0; i < 10; i++) { + testString << char('a' + (i % 26)); + } + + TString testStr = testString; + + // Should throw exception because we need 11 matches but array has only 5 + UNIT_ASSERT_EXCEPTION_CONTAINS( + re.Exec(testStr.c_str(), smallArray, REGEXP_GLOBAL, SMALL_SIZE), + yexception, + "Not enough space in pmatch array" + ); + } + + // Test for 99 capture groups - should succeed + // With ovecsize=300, pcre_exec can handle up to 100 matches (300/3=100) + // 99 groups + 1 full match = 100 matches, which fits exactly + void Test99CaptureGroups() { + // Create regex with 99 capture groups + TStringBuilder pattern; + pattern.Out.Reserve(99 * 4); + for (int i = 0; i < 99; i++) { + pattern << "(\\w)"; + } + + TString patternStr = pattern; + TRegExBase re(patternStr.c_str(), REG_EXTENDED); + + // Allocate array for 100 matches: 1 full match + 99 capture groups + constexpr int EXPECTED_MATCHES = 100; + regmatch_t matches[EXPECTED_MATCHES] = {}; + + // String with 99 characters + TStringBuilder testString; + testString.Out.Reserve(99); + for (int i = 0; i < 99; i++) { + testString << char('a' + (i % 26)); + } + + TString testStr = testString; + Cerr << "Test99CaptureGroups: pattern length = " << patternStr.size() << Endl; + + // Call with REGEXP_GLOBAL to use our code path with bounds checking + int result = re.Exec(testStr.c_str(), matches, REGEXP_GLOBAL, EXPECTED_MATCHES); + + Cerr << "Test99CaptureGroups result: " << result << Endl; + + // Should succeed with 99 groups + UNIT_ASSERT_VALUES_EQUAL(result, 0); + + // Count found groups + int matchCount = 0; + for (int i = 0; i < EXPECTED_MATCHES; i++) { + if (matches[i].rm_so == -1) { + break; + } + matchCount++; + } + + Cerr << "Captured groups: " << matchCount << Endl; + + // Should capture exactly 100 matches (1 full + 99 groups) + UNIT_ASSERT_VALUES_EQUAL(matchCount, 100); + } + + // Test for 100 capture groups - should throw exception + // 100 groups + 1 full match = 101 matches, which exceeds ovecsize/3=100 limit + // pcre_exec will return rc=0 indicating insufficient space + void Test100CaptureGroups() { + // Create regex with 100 capture groups + TStringBuilder pattern; + pattern.Out.Reserve(100 * 4); + for (int i = 0; i < 100; i++) { + pattern << "(\\w)"; + } + + TString patternStr = pattern; + TRegExBase re(patternStr.c_str(), REG_EXTENDED); + + // Allocate array for 101 matches: 1 full match + 100 capture groups + constexpr int EXPECTED_MATCHES = 101; + regmatch_t matches[EXPECTED_MATCHES] = {}; + + // String with 100 characters + TStringBuilder testString; + testString.Out.Reserve(100); + for (int i = 0; i < 100; i++) { + testString << char('a' + (i % 26)); + } + + TString testStr = testString; + Cerr << "Test100CaptureGroups: pattern length = " << patternStr.size() << Endl; + + // Should throw exception with message about insufficient space + UNIT_ASSERT_EXCEPTION_CONTAINS( + re.Exec(testStr.c_str(), matches, REGEXP_GLOBAL, EXPECTED_MATCHES), + yexception, + "Not enough space in internal buffer" + ); + } + + // Test for 300 capture groups - should throw exception + // 300 groups + 1 full match = 301 matches, far exceeds ovecsize/3=100 limit + // pcre_exec will return rc=0 indicating insufficient space + void Test300CaptureGroups() { + // Create regex with 300 capture groups + TStringBuilder pattern; + pattern.Out.Reserve(300 * 4); + for (int i = 0; i < 300; i++) { + pattern << "(\\w)"; + } + + TString patternStr = pattern; + TRegExBase re(patternStr.c_str(), REG_EXTENDED); + + // Allocate array for 301 matches: 1 full match + 300 capture groups + constexpr int EXPECTED_MATCHES = 301; + regmatch_t matches[EXPECTED_MATCHES] = {}; + + // String with 300 characters + TStringBuilder testString; + testString.Out.Reserve(300); + for (int i = 0; i < 300; i++) { + testString << char('a' + (i % 26)); + } + + TString testStr = testString; + Cerr << "Test300CaptureGroups: pattern length = " << patternStr.size() << Endl; + + // Should throw exception with message about insufficient space + UNIT_ASSERT_EXCEPTION_CONTAINS( + re.Exec(testStr.c_str(), matches, REGEXP_GLOBAL, EXPECTED_MATCHES), + yexception, + "Not enough space in internal buffer" + ); + } }; UNIT_TEST_SUITE_REGISTRATION(TRegexpTest); |
