diff options
author | sibiryakov <sibiryakov@yandex-team.ru> | 2022-02-10 16:49:33 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:33 +0300 |
commit | 7fa2bc85438d6a40801444b0def15f9e76a44034 (patch) | |
tree | 5d5cb817648f650d76cf1076100726fd9b8448e8 /library/cpp/regex | |
parent | 413709c9be39070df9cbd14ef3ec098591346ebd (diff) | |
download | ydb-7fa2bc85438d6a40801444b0def15f9e76a44034.tar.gz |
Restoring authorship annotation for <sibiryakov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r-- | library/cpp/regex/pcre/regexp.cpp | 176 | ||||
-rw-r--r-- | library/cpp/regex/pcre/regexp.h | 8 | ||||
-rw-r--r-- | library/cpp/regex/pcre/regexp_ut.cpp | 74 |
3 files changed, 129 insertions, 129 deletions
diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp index 0461a2907d..575c09cee4 100644 --- a/library/cpp/regex/pcre/regexp.cpp +++ b/library/cpp/regex/pcre/regexp.cpp @@ -8,35 +8,35 @@ #include <util/generic/noncopyable.h> class TGlobalImpl : TNonCopyable { -private: +private: const char* Str; regmatch_t* Pmatch; - int Options; - int StrLen; - int StartOffset, NotEmptyOpts, MatchPos; - int MatchBuf[NMATCHES * 3]; + int Options; + int StrLen; + int StartOffset, NotEmptyOpts, MatchPos; + int MatchBuf[NMATCHES * 3]; pcre* PregComp; - - enum StateCode { - TGI_EXIT, - TGI_CONTINUE, - TGI_WALKTHROUGH - }; - -private: + + enum StateCode { + TGI_EXIT, + TGI_CONTINUE, + TGI_WALKTHROUGH + }; + +private: void CopyResults(int count) { - for (int i = 0; i < count; i++) { + for (int i = 0; i < count; i++) { Pmatch[MatchPos].rm_so = MatchBuf[2 * i]; Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1]; - MatchPos++; - if (MatchPos >= NMATCHES) { + MatchPos++; + if (MatchPos >= NMATCHES) { ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; - } + } } - } - + } + int DoPcreExec(int opts) { - int rc = pcre_exec( + int rc = pcre_exec( PregComp, /* the compiled pattern */ nullptr, /* no extra data - we didn't study the pattern */ Str, /* the subject string */ @@ -45,37 +45,37 @@ private: opts, /* default options */ MatchBuf, /* output vector for substring information */ NMATCHES); /* number of elements in the output vector */ - - if (rc == 0) { + + if (rc == 0) { ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; - } - - return rc; - } - - StateCode CheckEmptyCase() { - if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string + } + + return rc; + } + + StateCode CheckEmptyCase() { + if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string if (MatchBuf[0] == StrLen) { // at the end - return TGI_EXIT; - } - NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string - } - return TGI_WALKTHROUGH; - } - - StateCode CheckNoMatch(int rc) { - if (rc == PCRE_ERROR_NOMATCH) { + return TGI_EXIT; + } + NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string + } + return TGI_WALKTHROUGH; + } + + StateCode CheckNoMatch(int rc) { + if (rc == PCRE_ERROR_NOMATCH) { if (NotEmptyOpts == 0) { - return TGI_EXIT; - } + return TGI_EXIT; + } MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset" - return TGI_CONTINUE; - } - return TGI_WALKTHROUGH; - } - -public: + return TGI_CONTINUE; + } + return TGI_WALKTHROUGH; + } + +public: TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re) : Str(st) , Pmatch(&pma) @@ -84,52 +84,52 @@ public: , NotEmptyOpts(0) , MatchPos(0) , PregComp(pc_re) - { - memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES); - StrLen = strlen(Str); - } - + { + memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES); + StrLen = strlen(Str); + } + int ExecGlobal() { - StartOffset = 0; - int rc = DoPcreExec(Options); - - if (rc < 0) { - return rc; - } - CopyResults(rc); - do { - NotEmptyOpts = 0; - StartOffset = MatchBuf[1]; - - if (CheckEmptyCase() == TGI_EXIT) { - return 0; - } - - rc = DoPcreExec(NotEmptyOpts | Options); - - switch (CheckNoMatch(rc)) { - case TGI_CONTINUE: - continue; - case TGI_EXIT: - return 0; - case TGI_WALKTHROUGH: - default: + StartOffset = 0; + int rc = DoPcreExec(Options); + + if (rc < 0) { + return rc; + } + CopyResults(rc); + do { + NotEmptyOpts = 0; + StartOffset = MatchBuf[1]; + + if (CheckEmptyCase() == TGI_EXIT) { + return 0; + } + + rc = DoPcreExec(NotEmptyOpts | Options); + + switch (CheckNoMatch(rc)) { + case TGI_CONTINUE: + continue; + case TGI_EXIT: + return 0; + case TGI_WALKTHROUGH: + default: break; } - - if (rc < 0) { - return rc; - } - - CopyResults(rc); + + if (rc < 0) { + return rc; + } + + CopyResults(rc); } while (true); - + return 0; - } + } + +private: +}; -private: -}; - class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> { friend class TRegExBase; @@ -198,8 +198,8 @@ bool TRegExBase::IsCompiled() const { TRegExBase::TRegExBase(const char* re, int cflags) { if (re) { Compile(re, cflags); - } -} + } +} TRegExBase::TRegExBase(const TString& re, int cflags) { Compile(re, cflags); diff --git a/library/cpp/regex/pcre/regexp.h b/library/cpp/regex/pcre/regexp.h index 50c8b35ba4..bc610bd2f3 100644 --- a/library/cpp/regex/pcre/regexp.h +++ b/library/cpp/regex/pcre/regexp.h @@ -12,18 +12,18 @@ //THIS CODE LOOKS LIKE A TRASH, BUT WORKS. #define NMATCHES 100 -#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences - +#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences + class TRegExBaseImpl; -class TRegExBase { +class TRegExBase { protected: TSimpleIntrusivePtr<TRegExBaseImpl> Impl; public: TRegExBase(const char* regExpr = nullptr, int cflags = REG_EXTENDED); TRegExBase(const TString& regExpr, int cflags = REG_EXTENDED); - + virtual ~TRegExBase(); int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const; diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp index 0df0fdf4ab..5184e801cc 100644 --- a/library/cpp/regex/pcre/regexp_ut.cpp +++ b/library/cpp/regex/pcre/regexp_ut.cpp @@ -1,16 +1,16 @@ #include <library/cpp/testing/unittest/registar.h> - + #include <util/string/strip.h> #include <library/cpp/regex/pcre/regexp.h> -#include <util/stream/output.h> - -struct TRegTest { - const char* Regexp; - const char* Data; - const char* Result; - int CompileOptions; - int RunOptions; - +#include <util/stream/output.h> + +struct TRegTest { + const char* Regexp; + const char* Data; + const char* Result; + int CompileOptions; + int RunOptions; + TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0) : Regexp(re) , Data(text) @@ -19,8 +19,8 @@ struct TRegTest { , RunOptions(ropts) { } -}; - +}; + struct TSubstTest: public TRegTest { const char* Replacement; const char* Replacement2; @@ -35,44 +35,44 @@ struct TSubstTest: public TRegTest { const TRegTest REGTEST_DATA[] = { TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0), - TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL), - TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL), + TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL), + TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL), TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)}; - + const TSubstTest SUBSTTEST_DATA[] = { TSubstTest("([a-zA-Z]*[0-9]+) (_[a-z]+)", "Xxx123 534 ___124 bsd _A ZXC _L 141 _sd dsfg QWE123 _bbb", "141 XXX/_sd", "$1 XXX/$2", "$2$2$2 YY$1Y/$2")}; -class TRegexpTest: public TTestBase { -private: - regmatch_t Matches[NMATCHES]; - -private: - UNIT_TEST_SUITE(TRegexpTest); +class TRegexpTest: public TTestBase { +private: + regmatch_t Matches[NMATCHES]; + +private: + UNIT_TEST_SUITE(TRegexpTest); UNIT_TEST(TestRe) UNIT_TEST(TestSubst) UNIT_TEST(TestOffEndOfBuffer); - UNIT_TEST_SUITE_END(); - - inline void TestRe() { + UNIT_TEST_SUITE_END(); + + inline void TestRe() { for (const auto& regTest : REGTEST_DATA) { - memset(Matches, 0, sizeof(Matches)); + memset(Matches, 0, sizeof(Matches)); TString result; - + TRegExBase re(regTest.Regexp, regTest.CompileOptions); if (re.Exec(regTest.Data, Matches, regTest.RunOptions) == 0) { for (auto& matche : Matches) { if (matche.rm_so == -1) { - break; - } + break; + } result.append(Sprintf("%i %i ", matche.rm_so, matche.rm_eo)); - } - } else { - result = "NM"; - } + } + } else { + result = "NM"; + } StripInPlace(result); UNIT_ASSERT_VALUES_EQUAL(result, regTest.Result); - } - } + } + } inline void TestSubst() { for (const auto& substTest : SUBSTTEST_DATA) { @@ -98,6 +98,6 @@ private: const TString haystack{"fakty.ictv.ua"}; UNIT_ASSERT_VALUES_EQUAL(re.Match(haystack.c_str()), false); } -}; - -UNIT_TEST_SUITE_REGISTRATION(TRegexpTest); +}; + +UNIT_TEST_SUITE_REGISTRATION(TRegexpTest); |