diff options
| author | iblinnikov <[email protected]> | 2022-02-10 16:48:07 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:48:07 +0300 | 
| commit | ed2bfbca3e30e641448ad350b4305c69e12aff88 (patch) | |
| tree | b222e5ac2e2e98872661c51ccceee5da0d291e13 /library/cpp/regex/pire | |
| parent | b420f761873190614f41ed39c6d96bd3dc14fd00 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex/pire')
| -rw-r--r-- | library/cpp/regex/pire/regexp.h | 128 | ||||
| -rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 162 | 
2 files changed, 145 insertions, 145 deletions
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index 92ce16e14f8..94bba4064b7 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -48,7 +48,7 @@ namespace NRegExp {          };          static inline NPire::TFsm Parse(const TStringBuf& regexp, -                                        const TOptions& opts, const bool needDetermine = true) {  +                                        const TOptions& opts, const bool needDetermine = true) {              NPire::TLexer lexer;              if (opts.Charset == CODES_UNKNOWN) {                  lexer.Assign(regexp.data(), regexp.data() + regexp.size()); @@ -98,9 +98,9 @@ namespace NRegExp {                  ret.Surround();              } -            if (needDetermine) {  -                ret.Determine();  -            }  +            if (needDetermine) { +                ret.Determine(); +            }              return ret;          } @@ -113,8 +113,8 @@ namespace NRegExp {      public:          inline explicit TFsmParser(const TStringBuf& regexp, -                                   const TOptions& opts = TOptions(), bool needDetermine = true)  -            : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())  +                                   const TOptions& opts = TOptions(), bool needDetermine = true) +            : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())          {          } @@ -177,19 +177,19 @@ namespace NRegExp {          }      }; -    struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {  +    struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {          inline explicit TSlowCapturingFsm(const TStringBuf& regexp, -                                          TOptions opts = TOptions())  -                : TFsmParser<TScanner>(regexp,  -                                       opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) {  -        }  -  -        inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm)  -                : TFsmParser<TScanner>(fsm)  -        {  -        }  -    };  -  +                                          TOptions opts = TOptions()) +                : TFsmParser<TScanner>(regexp, +                                       opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) { +        } + +        inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm) +                : TFsmParser<TScanner>(fsm) +        { +        } +    }; +      template <class TFsm>      class TMatcherBase {      public: @@ -285,53 +285,53 @@ namespace NRegExp {      private:          TStringBuf Data;      }; -  -    class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{  -    public:  -        typedef typename TSlowCapturingFsm::TScanner::State TState;  -        inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm)  -                : TMatcherBase<TSlowCapturingFsm>(fsm)  -                , HasCaptured(false)  -        {  -        }  -  -        inline bool Captured() const noexcept {  -            return HasCaptured;  -        }  -  -        inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {  -            TStringBuf textData(data, len);  -            Data = textData;  + +    class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{ +    public: +        typedef typename TSlowCapturingFsm::TScanner::State TState; +        inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm) +                : TMatcherBase<TSlowCapturingFsm>(fsm) +                , HasCaptured(false) +        { +        } + +        inline bool Captured() const noexcept { +            return HasCaptured; +        } + +        inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { +            TStringBuf textData(data, len); +            Data = textData;              Run(Data.begin(), Data.size(), addBegin, addEnd); -            return GetAns();  -        }  -  +            return GetAns(); +        } +          inline TSlowSearcher& Search(const TStringBuf& s) noexcept {              return Search(s.data(), s.size()); -        }  -  -        inline TStringBuf GetCaptured() const noexcept {  -            return Ans;  -        }  -  -    private:  -        TStringBuf Data;  -        TStringBuf Ans;  -        bool HasCaptured;  -  -        inline TSlowSearcher& GetAns() {  -            auto state = GetState();  -            Pire::SlowCapturingScanner::SingleState final;  -            if (!GetScanner().GetCapture(state, final)) {  -                HasCaptured = false;  -            } else {  -                if (!final.HasEnd()) {  -                    final.SetEnd(Data.size());  -                }  -                Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin());  -                HasCaptured = true;  -            }  -            return *this;  -        }  -    };  +        } + +        inline TStringBuf GetCaptured() const noexcept { +            return Ans; +        } + +    private: +        TStringBuf Data; +        TStringBuf Ans; +        bool HasCaptured; + +        inline TSlowSearcher& GetAns() { +            auto state = GetState(); +            Pire::SlowCapturingScanner::SingleState final; +            if (!GetScanner().GetCapture(state, final)) { +                HasCaptured = false; +            } else { +                if (!final.HasEnd()) { +                    final.SetEnd(Data.size()); +                } +                Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin()); +                HasCaptured = true; +            } +            return *this; +        } +    };  } diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index 19694cfe594..e7206de9ad4 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -216,95 +216,95 @@ Y_UNIT_TEST_SUITE(TRegExp) {          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("xx"));      } -    Y_UNIT_TEST(SlowCapture) {  -        TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",  -                              TFsm::TOptions().SetCapture(2));  -        TSlowSearcher searcher(fsm);  -        searcher.Search("http://vkontakte.ru/id100500");  -        UNIT_ASSERT(searcher.Captured());  +    Y_UNIT_TEST(SlowCapture) { +        TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", +                              TFsm::TOptions().SetCapture(2)); +        TSlowSearcher searcher(fsm); +        searcher.Search("http://vkontakte.ru/id100500"); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("100500")); -    }  -  -    Y_UNIT_TEST(SlowCaptureGreedy) {  -        TSlowCapturingFsm fsm(".*(pref.*suff)");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("pref ala bla pref cla suff dla");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(SlowCaptureGreedy) { +        TSlowCapturingFsm fsm(".*(pref.*suff)"); +        TSlowSearcher searcher(fsm); +        searcher.Search("pref ala bla pref cla suff dla"); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref cla suff")); -    }  -  -    Y_UNIT_TEST(SlowCaptureNonGreedy) {  -        TSlowCapturingFsm fsm(".*?(pref.*suff)");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("pref ala bla pref cla suff dla");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(SlowCaptureNonGreedy) { +        TSlowCapturingFsm fsm(".*?(pref.*suff)"); +        TSlowSearcher searcher(fsm); +        searcher.Search("pref ala bla pref cla suff dla"); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref ala bla pref cla suff")); -    }  -  -    Y_UNIT_TEST(SlowCapture2) {  -        TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",  -                              TFsm::TOptions().SetCharset(CODES_UTF8));  -  -        TSlowSearcher searcher(fsm);  -        searcher.Search("   Здравствуйте, Уважаемый (-ая)!   ");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(SlowCapture2) { +        TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", +                              TFsm::TOptions().SetCharset(CODES_UTF8)); + +        TSlowSearcher searcher(fsm); +        searcher.Search("   Здравствуйте, Уважаемый (-ая)!   "); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)")); -    }  -  -    Y_UNIT_TEST(SlowCapture3) {  -        TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(SlowCapture3) { +        TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);"); +        TSlowSearcher searcher(fsm); +        searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF"); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("0x0d0a")); -    }  -  -    Y_UNIT_TEST(SlowCapture4) {  -        TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\"");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("\"/away.php?to=http:some.addr\"&id=1");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(SlowCapture4) { +        TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\""); +        TSlowSearcher searcher(fsm); +        searcher.Search("\"/away.php?to=http:some.addr\"&id=1"); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("some.addr")); -    }  -  -    Y_UNIT_TEST(CapturedEmptySlow) {  -        TSlowCapturingFsm fsm("Comments=(.*)$");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("And Comments=");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(CapturedEmptySlow) { +        TSlowCapturingFsm fsm("Comments=(.*)$"); +        TSlowSearcher searcher(fsm); +        searcher.Search("And Comments="); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("")); -    }  -  -    Y_UNIT_TEST(CaptureInOrFirst) {  -        TSlowCapturingFsm fsm("(A)|A");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("A");  -        UNIT_ASSERT(searcher.Captured());  -    }  -  -    Y_UNIT_TEST(CaptureInOrSecond) {  -        TSlowCapturingFsm fsm("A|(A)");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("A");  -        UNIT_ASSERT(!searcher.Captured());  -    }  -  -    Y_UNIT_TEST(CaptureOutside) {  -        TSlowCapturingFsm fsm("((ID=([0-9]+))?)");  -        TSlowSearcher searcher(fsm);  -        searcher.Search("ID=");  -        UNIT_ASSERT(searcher.Captured());  +    } + +    Y_UNIT_TEST(CaptureInOrFirst) { +        TSlowCapturingFsm fsm("(A)|A"); +        TSlowSearcher searcher(fsm); +        searcher.Search("A"); +        UNIT_ASSERT(searcher.Captured()); +    } + +    Y_UNIT_TEST(CaptureInOrSecond) { +        TSlowCapturingFsm fsm("A|(A)"); +        TSlowSearcher searcher(fsm); +        searcher.Search("A"); +        UNIT_ASSERT(!searcher.Captured()); +    } + +    Y_UNIT_TEST(CaptureOutside) { +        TSlowCapturingFsm fsm("((ID=([0-9]+))?)"); +        TSlowSearcher searcher(fsm); +        searcher.Search("ID="); +        UNIT_ASSERT(searcher.Captured());          UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("")); -    }  -  -    Y_UNIT_TEST(CaptureInside) {  -        TSlowCapturingFsm fsm("((ID=([0-9]+))?)",  -                              TFsm::TOptions().SetCapture(2));  -        TSlowSearcher searcher(fsm);  -        searcher.Search("ID=");  -        UNIT_ASSERT(!searcher.Captured());  -    }  -  +    } + +    Y_UNIT_TEST(CaptureInside) { +        TSlowCapturingFsm fsm("((ID=([0-9]+))?)", +                              TFsm::TOptions().SetCapture(2)); +        TSlowSearcher searcher(fsm); +        searcher.Search("ID="); +        UNIT_ASSERT(!searcher.Captured()); +    } +      Y_UNIT_TEST(Pcre2PireTest) {          UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)");          UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?");  | 
