diff options
| author | sibiryakov <[email protected]> | 2022-02-10 16:49:33 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:49:33 +0300 | 
| commit | 413709c9be39070df9cbd14ef3ec098591346ebd (patch) | |
| tree | 6a130dff5487ce1eaa161ad316c6c29c998a8ba8 /library/cpp/regex | |
| parent | d8825989a7621fc7275e8cfdba16b4f699fb6359 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
| -rw-r--r-- | library/cpp/regex/pcre/regexp.cpp | 176 | ||||
| -rw-r--r-- | library/cpp/regex/pcre/regexp.h | 8 | ||||
| -rw-r--r-- | library/cpp/regex/pcre/regexp_ut.cpp | 74 | 
3 files changed, 129 insertions, 129 deletions
diff --git a/library/cpp/regex/pcre/regexp.cpp b/library/cpp/regex/pcre/regexp.cpp index 575c09cee40..0461a2907da 100644 --- a/library/cpp/regex/pcre/regexp.cpp +++ b/library/cpp/regex/pcre/regexp.cpp @@ -8,35 +8,35 @@  #include <util/generic/noncopyable.h>  class TGlobalImpl : TNonCopyable { -private: +private:       const char* Str;      regmatch_t* Pmatch; -    int Options; -    int StrLen; -    int StartOffset, NotEmptyOpts, MatchPos; -    int MatchBuf[NMATCHES * 3]; +    int Options;  +    int StrLen;  +    int StartOffset, NotEmptyOpts, MatchPos;  +    int MatchBuf[NMATCHES * 3];       pcre* PregComp; - -    enum StateCode { -        TGI_EXIT, -        TGI_CONTINUE, -        TGI_WALKTHROUGH -    }; - -private: +  +    enum StateCode {  +        TGI_EXIT,  +        TGI_CONTINUE,  +        TGI_WALKTHROUGH  +    };  +  +private:       void CopyResults(int count) { -        for (int i = 0; i < count; i++) { +        for (int i = 0; i < count; i++) {               Pmatch[MatchPos].rm_so = MatchBuf[2 * i];              Pmatch[MatchPos].rm_eo = MatchBuf[2 * i + 1]; -            MatchPos++; -            if (MatchPos >= NMATCHES) { +            MatchPos++;  +            if (MatchPos >= NMATCHES) {                   ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; -            } +            }           } -    } - +    }  +       int DoPcreExec(int opts) { -        int rc = pcre_exec( +        int rc = pcre_exec(               PregComp,    /* the compiled pattern */              nullptr,     /* no extra data - we didn't study the pattern */              Str,         /* the subject string */ @@ -45,37 +45,37 @@ private:              opts,        /* default options */              MatchBuf,    /* output vector for substring information */              NMATCHES);   /* number of elements in the output vector */ - -        if (rc == 0) { +  +        if (rc == 0) {               ythrow yexception() << "TRegExBase::Exec(): Not enough space in internal buffer."; -        } - -        return rc; -    } - -    StateCode CheckEmptyCase() { -        if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string +        }  +  +        return rc;  +    }  +  +    StateCode CheckEmptyCase() {  +        if (MatchBuf[0] == MatchBuf[1]) { // founded an empty string               if (MatchBuf[0] == StrLen) {  // at the end -                return TGI_EXIT; -            } -            NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string -        } -        return TGI_WALKTHROUGH; -    } - -    StateCode CheckNoMatch(int rc) { -        if (rc == PCRE_ERROR_NOMATCH) { +                return TGI_EXIT;  +            }  +            NotEmptyOpts = PCRE_NOTEMPTY | PCRE_ANCHORED; // trying to find non empty string  +        }  +        return TGI_WALKTHROUGH;  +    }  +  +    StateCode CheckNoMatch(int rc) {  +        if (rc == PCRE_ERROR_NOMATCH) {               if (NotEmptyOpts == 0) { -                return TGI_EXIT; -            } +                return TGI_EXIT;  +            }               MatchBuf[1] = StartOffset + 1; // we have failed to find non-empty-string. trying to find again shifting "previous match offset" -            return TGI_CONTINUE; -        } -        return TGI_WALKTHROUGH; -    } - -public: +            return TGI_CONTINUE;  +        }  +        return TGI_WALKTHROUGH;  +    }  +  +public:       TGlobalImpl(const char* st, regmatch_t& pma, int opts, pcre* pc_re)          : Str(st)          , Pmatch(&pma) @@ -84,52 +84,52 @@ public:          , NotEmptyOpts(0)          , MatchPos(0)          , PregComp(pc_re) -    { -        memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES); -        StrLen = strlen(Str); -    } - +    {  +        memset(Pmatch, -1, sizeof(regmatch_t) * NMATCHES);  +        StrLen = strlen(Str);  +    }  +       int ExecGlobal() { -        StartOffset = 0; -        int rc = DoPcreExec(Options); - -        if (rc < 0) { -            return rc; -        } -        CopyResults(rc); -        do { -            NotEmptyOpts = 0; -            StartOffset = MatchBuf[1]; - -            if (CheckEmptyCase() == TGI_EXIT) { -                return 0; -            } - -            rc = DoPcreExec(NotEmptyOpts | Options); - -            switch (CheckNoMatch(rc)) { -                case TGI_CONTINUE: -                    continue; -                case TGI_EXIT: -                    return 0; -                case TGI_WALKTHROUGH: -                default: +        StartOffset = 0;  +        int rc = DoPcreExec(Options);  +  +        if (rc < 0) {  +            return rc;  +        }  +        CopyResults(rc);  +        do {  +            NotEmptyOpts = 0;  +            StartOffset = MatchBuf[1];  +  +            if (CheckEmptyCase() == TGI_EXIT) {  +                return 0;  +            }  +  +            rc = DoPcreExec(NotEmptyOpts | Options);  +  +            switch (CheckNoMatch(rc)) {  +                case TGI_CONTINUE:  +                    continue;  +                case TGI_EXIT:  +                    return 0;  +                case TGI_WALKTHROUGH:  +                default:                       break;              } - -            if (rc < 0) { -                return rc; -            } - -            CopyResults(rc); +  +            if (rc < 0) {  +                return rc;  +            }  +  +            CopyResults(rc);           } while (true); - +           return 0; -    } - -private: -}; +    }  +private:  +};  +   class TRegExBaseImpl: public TAtomicRefCount<TRegExBaseImpl> {      friend class TRegExBase; @@ -198,8 +198,8 @@ bool TRegExBase::IsCompiled() const {  TRegExBase::TRegExBase(const char* re, int cflags) {      if (re) {          Compile(re, cflags); -    } -} +    }  +}   TRegExBase::TRegExBase(const TString& re, int cflags) {      Compile(re, cflags); diff --git a/library/cpp/regex/pcre/regexp.h b/library/cpp/regex/pcre/regexp.h index bc610bd2f34..50c8b35ba4c 100644 --- a/library/cpp/regex/pcre/regexp.h +++ b/library/cpp/regex/pcre/regexp.h @@ -12,18 +12,18 @@  //THIS CODE LOOKS LIKE A TRASH, BUT WORKS.  #define NMATCHES 100 -#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences - +#define REGEXP_GLOBAL 0x0080 // use this if you want to find all occurences  +   class TRegExBaseImpl; -class TRegExBase { +class TRegExBase {   protected:      TSimpleIntrusivePtr<TRegExBaseImpl> Impl;  public:      TRegExBase(const char* regExpr = nullptr, int cflags = REG_EXTENDED);      TRegExBase(const TString& regExpr, int cflags = REG_EXTENDED); - +       virtual ~TRegExBase();      int Exec(const char* str, regmatch_t pmatch[], int eflags, int nmatches = NMATCHES) const; diff --git a/library/cpp/regex/pcre/regexp_ut.cpp b/library/cpp/regex/pcre/regexp_ut.cpp index 5184e801cc6..0df0fdf4ab0 100644 --- a/library/cpp/regex/pcre/regexp_ut.cpp +++ b/library/cpp/regex/pcre/regexp_ut.cpp @@ -1,16 +1,16 @@  #include <library/cpp/testing/unittest/registar.h> - +   #include <util/string/strip.h>  #include <library/cpp/regex/pcre/regexp.h> -#include <util/stream/output.h> - -struct TRegTest { -    const char* Regexp; -    const char* Data; -    const char* Result; -    int CompileOptions; -    int RunOptions; - +#include <util/stream/output.h>  +  +struct TRegTest {  +    const char* Regexp;  +    const char* Data;  +    const char* Result;  +    int CompileOptions;  +    int RunOptions;  +       TRegTest(const char* re, const char* text, const char* res, int copts = REG_EXTENDED, int ropts = 0)          : Regexp(re)          , Data(text) @@ -19,8 +19,8 @@ struct TRegTest {          , RunOptions(ropts)      {      } -}; - +};  +   struct TSubstTest: public TRegTest {      const char* Replacement;      const char* Replacement2; @@ -35,44 +35,44 @@ struct TSubstTest: public TRegTest {  const TRegTest REGTEST_DATA[] = {      TRegTest("test", "its a test and test string.", "6 10", REG_EXTENDED, 0), -    TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL), -    TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL), +    TRegTest("test", "its a test and test string.", "6 10 15 19", REG_EXTENDED, REGEXP_GLOBAL),  +    TRegTest("test|[an]{0,0}", "test and test an test string tes", "0 4 4 4 5 5 6 6 7 7 8 8 9 13 13 13 14 14 15 15 16 16 17 21 21 21 22 22 23 23 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31 32 32", REG_EXTENDED, REGEXP_GLOBAL),       TRegTest("test[an]{1,}", "test and test an test string tes", "NM", REG_EXTENDED, REGEXP_GLOBAL)}; - +   const TSubstTest SUBSTTEST_DATA[] = {      TSubstTest("([a-zA-Z]*[0-9]+) (_[a-z]+)", "Xxx123 534 ___124 bsd _A ZXC _L 141 _sd dsfg QWE123 _bbb", "141 XXX/_sd", "$1 XXX/$2", "$2$2$2 YY$1Y/$2")}; -class TRegexpTest: public TTestBase { -private: -    regmatch_t Matches[NMATCHES]; - -private: -    UNIT_TEST_SUITE(TRegexpTest); +class TRegexpTest: public TTestBase {  +private:  +    regmatch_t Matches[NMATCHES];  +  +private:  +    UNIT_TEST_SUITE(TRegexpTest);       UNIT_TEST(TestRe)      UNIT_TEST(TestSubst)      UNIT_TEST(TestOffEndOfBuffer); -    UNIT_TEST_SUITE_END(); - -    inline void TestRe() { +    UNIT_TEST_SUITE_END();  +  +    inline void TestRe() {           for (const auto& regTest : REGTEST_DATA) { -            memset(Matches, 0, sizeof(Matches)); +            memset(Matches, 0, sizeof(Matches));               TString result; - +               TRegExBase re(regTest.Regexp, regTest.CompileOptions);              if (re.Exec(regTest.Data, Matches, regTest.RunOptions) == 0) {                  for (auto& matche : Matches) {                      if (matche.rm_so == -1) { -                        break; -                    } +                        break;  +                    }                       result.append(Sprintf("%i %i ", matche.rm_so, matche.rm_eo)); -                } -            } else { -                result = "NM"; -            } +                }  +            } else {  +                result = "NM";  +            }               StripInPlace(result);              UNIT_ASSERT_VALUES_EQUAL(result, regTest.Result); -        } -    } +        }  +    }       inline void TestSubst() {          for (const auto& substTest : SUBSTTEST_DATA) { @@ -98,6 +98,6 @@ private:          const TString haystack{"fakty.ictv.ua"};          UNIT_ASSERT_VALUES_EQUAL(re.Match(haystack.c_str()), false);      } -}; - -UNIT_TEST_SUITE_REGISTRATION(TRegexpTest); +};  +  +UNIT_TEST_SUITE_REGISTRATION(TRegexpTest);   | 
