aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex
diff options
context:
space:
mode:
authorsibiryakov <sibiryakov@yandex-team.ru>2022-02-10 16:49:33 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:33 +0300
commit7fa2bc85438d6a40801444b0def15f9e76a44034 (patch)
tree5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/regex
parent413709c9be39070df9cbd14ef3ec098591346ebd (diff)
downloadydb-7fa2bc85438d6a40801444b0def15f9e76a44034.tar.gz
Restoring authorship annotation for <sibiryakov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r--library/cpp/regex/pcre/regexp.cpp176
-rw-r--r--library/cpp/regex/pcre/regexp.h8
-rw-r--r--library/cpp/regex/pcre/regexp_ut.cpp74
3 files changed, 129 insertions, 129 deletions
diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp
index 0461a2907d..575c09cee4 100644
--- a/library/cpp/regex/pcre/regexp.cpp
+++ b/library/cpp/regex/pcre/regexp.cpp
@@ -8,35 +8,35 @@
#include <util/generic/noncopyable.h>
class TGlobalImpl : TNonCopyable {
-private:
+private:
const char* Str;
regmatch_t* Pmatch;
- int Options;
- int StrLen;
- int StartOffset, NotEmptyOpts, MatchPos;
- int MatchBuf[NMATCHES * 3];
+ int Options;
+ int StrLen;
+ int StartOffset, NotEmptyOpts, MatchPos;
+ int MatchBuf[NMATCHES * 3];
pcre* PregComp;
-
- enum StateCode {
- TGI_EXIT,
- TGI_CONTINUE,
- TGI_WALKTHROUGH
- };
-
-private:
+
+ enum StateCode {
+ TGI_EXIT,
+ TGI_CONTINUE,
+ TGI_WALKTHROUGH
+ };
+
+private:
void CopyResults(int count) {
- for (int i = 0; i < count; i++) {
+ for (int i = 0; i < count; i++) {
Pmatch[MatchPos].rm_so = MatchBuf[2 * i];
Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1];
- MatchPos++;
- if (MatchPos >= NMATCHES) {
+ MatchPos++;
+ if (MatchPos >= NMATCHES) {
ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer.";
- }
+ }
}
- }
-
+ }
+
int DoPcreExec(int opts) {
- int rc = pcre_exec(
+ int rc = pcre_exec(
PregComp, /* the compiled pattern */
nullptr, /* no extra data - we didn't study the pattern */
Str, /* the subject string */
@@ -45,37 +45,37 @@ private:
opts, /* default options */
MatchBuf, /* output vector for substring information */
NMATCHES); /* number of elements in the output vector */
-
- if (rc == 0) {
+
+ if (rc == 0) {
ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer.";
- }
-
- return rc;
- }
-
- StateCode CheckEmptyCase() {
- if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string
+ }
+
+ return rc;
+ }
+
+ StateCode CheckEmptyCase() {
+ if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string
if (MatchBuf[0] == StrLen) { // at the end
- return TGI_EXIT;
- }
- NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string
- }
- return TGI_WALKTHROUGH;
- }
-
- StateCode CheckNoMatch(int rc) {
- if (rc == PCRE_ERROR_NOMATCH) {
+ return TGI_EXIT;
+ }
+ NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string
+ }
+ return TGI_WALKTHROUGH;
+ }
+
+ StateCode CheckNoMatch(int rc) {
+ if (rc == PCRE_ERROR_NOMATCH) {
if (NotEmptyOpts == 0) {
- return TGI_EXIT;
- }
+ return TGI_EXIT;
+ }
MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset"
- return TGI_CONTINUE;
- }
- return TGI_WALKTHROUGH;
- }
-
-public:
+ return TGI_CONTINUE;
+ }
+ return TGI_WALKTHROUGH;
+ }
+
+public:
TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re)
: Str(st)
, Pmatch(&pma)
@@ -84,52 +84,52 @@ public:
, NotEmptyOpts(0)
, MatchPos(0)
, PregComp(pc_re)
- {
- memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES);
- StrLen = strlen(Str);
- }
-
+ {
+ memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES);
+ StrLen = strlen(Str);
+ }
+
int ExecGlobal() {
- StartOffset = 0;
- int rc = DoPcreExec(Options);
-
- if (rc < 0) {
- return rc;
- }
- CopyResults(rc);
- do {
- NotEmptyOpts = 0;
- StartOffset = MatchBuf[1];
-
- if (CheckEmptyCase() == TGI_EXIT) {
- return 0;
- }
-
- rc = DoPcreExec(NotEmptyOpts | Options);
-
- switch (CheckNoMatch(rc)) {
- case TGI_CONTINUE:
- continue;
- case TGI_EXIT:
- return 0;
- case TGI_WALKTHROUGH:
- default:
+ StartOffset = 0;
+ int rc = DoPcreExec(Options);
+
+ if (rc < 0) {
+ return rc;
+ }
+ CopyResults(rc);
+ do {
+ NotEmptyOpts = 0;
+ StartOffset = MatchBuf[1];
+
+ if (CheckEmptyCase() == TGI_EXIT) {
+ return 0;
+ }
+
+ rc = DoPcreExec(NotEmptyOpts | Options);
+
+ switch (CheckNoMatch(rc)) {
+ case TGI_CONTINUE:
+ continue;
+ case TGI_EXIT:
+ return 0;
+ case TGI_WALKTHROUGH:
+ default:
break;
}
-
- if (rc < 0) {
- return rc;
- }
-
- CopyResults(rc);
+
+ if (rc < 0) {
+ return rc;
+ }
+
+ CopyResults(rc);
} while (true);
-
+
return 0;
- }
+ }
+
+private:
+};
-private:
-};
-
class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> {
friend class TRegExBase;
@@ -198,8 +198,8 @@ bool TRegExBase::IsCompiled() const {
TRegExBase::TRegExBase(const char* re, int cflags) {
if (re) {
Compile(re, cflags);
- }
-}
+ }
+}
TRegExBase::TRegExBase(const TString& re, int cflags) {
Compile(re, cflags);
diff --git a/library/cpp/regex/pcre/regexp.h b/library/cpp/regex/pcre/regexp.h
index 50c8b35ba4..bc610bd2f3 100644
--- a/library/cpp/regex/pcre/regexp.h
+++ b/library/cpp/regex/pcre/regexp.h
@@ -12,18 +12,18 @@
//THIS CODE LOOKS LIKE A TRASH, BUT WORKS.
#define NMATCHES 100
-#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences
-
+#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences
+
class TRegExBaseImpl;
-class TRegExBase {
+class TRegExBase {
protected:
TSimpleIntrusivePtr<TRegExBaseImpl> Impl;
public:
TRegExBase(const char* regExpr = nullptr, int cflags = REG_EXTENDED);
TRegExBase(const TString& regExpr, int cflags = REG_EXTENDED);
-
+
virtual ~TRegExBase();
int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const;
diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp
index 0df0fdf4ab..5184e801cc 100644
--- a/library/cpp/regex/pcre/regexp_ut.cpp
+++ b/library/cpp/regex/pcre/regexp_ut.cpp
@@ -1,16 +1,16 @@
#include <library/cpp/testing/unittest/registar.h>
-
+
#include <util/string/strip.h>
#include <library/cpp/regex/pcre/regexp.h>
-#include <util/stream/output.h>
-
-struct TRegTest {
- const char* Regexp;
- const char* Data;
- const char* Result;
- int CompileOptions;
- int RunOptions;
-
+#include <util/stream/output.h>
+
+struct TRegTest {
+ const char* Regexp;
+ const char* Data;
+ const char* Result;
+ int CompileOptions;
+ int RunOptions;
+
TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0)
: Regexp(re)
, Data(text)
@@ -19,8 +19,8 @@ struct TRegTest {
, RunOptions(ropts)
{
}
-};
-
+};
+
struct TSubstTest: public TRegTest {
const char* Replacement;
const char* Replacement2;
@@ -35,44 +35,44 @@ struct TSubstTest: public TRegTest {
const TRegTest REGTEST_DATA[] = {
TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0),
- TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL),
- TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL),
+ TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL),
+ TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL),
TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)};
-
+
const TSubstTest SUBSTTEST_DATA[] = {
TSubstTest("([a-zA-Z]*[0-9]+) (_[a-z]+)", "Xxx123 534 ___124 bsd _A ZXC _L 141 _sd dsfg QWE123 _bbb", "141 XXX/_sd", "$1 XXX/$2", "$2$2$2 YY$1Y/$2")};
-class TRegexpTest: public TTestBase {
-private:
- regmatch_t Matches[NMATCHES];
-
-private:
- UNIT_TEST_SUITE(TRegexpTest);
+class TRegexpTest: public TTestBase {
+private:
+ regmatch_t Matches[NMATCHES];
+
+private:
+ UNIT_TEST_SUITE(TRegexpTest);
UNIT_TEST(TestRe)
UNIT_TEST(TestSubst)
UNIT_TEST(TestOffEndOfBuffer);
- UNIT_TEST_SUITE_END();
-
- inline void TestRe() {
+ UNIT_TEST_SUITE_END();
+
+ inline void TestRe() {
for (const auto& regTest : REGTEST_DATA) {
- memset(Matches, 0, sizeof(Matches));
+ memset(Matches, 0, sizeof(Matches));
TString result;
-
+
TRegExBase re(regTest.Regexp, regTest.CompileOptions);
if (re.Exec(regTest.Data, Matches, regTest.RunOptions) == 0) {
for (auto& matche : Matches) {
if (matche.rm_so == -1) {
- break;
- }
+ break;
+ }
result.append(Sprintf("%i %i ", matche.rm_so, matche.rm_eo));
- }
- } else {
- result = "NM";
- }
+ }
+ } else {
+ result = "NM";
+ }
StripInPlace(result);
UNIT_ASSERT_VALUES_EQUAL(result, regTest.Result);
- }
- }
+ }
+ }
inline void TestSubst() {
for (const auto& substTest : SUBSTTEST_DATA) {
@@ -98,6 +98,6 @@ private:
const TString haystack{"fakty.ictv.ua"};
UNIT_ASSERT_VALUES_EQUAL(re.Match(haystack.c_str()), false);
}
-};
-
-UNIT_TEST_SUITE_REGISTRATION(TRegexpTest);
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TRegexpTest);