diff options
author | iblinnikov <iblinnikov@yandex-team.ru> | 2022-02-10 16:48:07 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:07 +0300 |
commit | b420f761873190614f41ed39c6d96bd3dc14fd00 (patch) | |
tree | 88f25d43498bcac1aa20e80ca2979e17a9f95018 /library/cpp/regex | |
parent | 1916d87e4a1be8b60140240d49f0572a22e54bf8 (diff) | |
download | ydb-b420f761873190614f41ed39c6d96bd3dc14fd00.tar.gz |
Restoring authorship annotation for <iblinnikov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r-- | library/cpp/regex/pire/regexp.h | 128 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 162 |
2 files changed, 145 insertions, 145 deletions
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index 94bba4064b..92ce16e14f 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -48,7 +48,7 @@ namespace NRegExp { }; static inline NPire::TFsm Parse(const TStringBuf& regexp, - const TOptions& opts, const bool needDetermine = true) { + const TOptions& opts, const bool needDetermine = true) { NPire::TLexer lexer; if (opts.Charset == CODES_UNKNOWN) { lexer.Assign(regexp.data(), regexp.data() + regexp.size()); @@ -98,9 +98,9 @@ namespace NRegExp { ret.Surround(); } - if (needDetermine) { - ret.Determine(); - } + if (needDetermine) { + ret.Determine(); + } return ret; } @@ -113,8 +113,8 @@ namespace NRegExp { public: inline explicit TFsmParser(const TStringBuf& regexp, - const TOptions& opts = TOptions(), bool needDetermine = true) - : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) + const TOptions& opts = TOptions(), bool needDetermine = true) + : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) { } @@ -177,19 +177,19 @@ namespace NRegExp { } }; - struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { + struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { inline explicit TSlowCapturingFsm(const TStringBuf& regexp, - TOptions opts = TOptions()) - : TFsmParser<TScanner>(regexp, - opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) { - } - - inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm) - : TFsmParser<TScanner>(fsm) - { - } - }; - + TOptions opts = TOptions()) + : TFsmParser<TScanner>(regexp, + opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) { + } + + inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm) + : TFsmParser<TScanner>(fsm) + { + } + }; + template <class TFsm> class TMatcherBase { public: @@ -285,53 +285,53 @@ namespace NRegExp { private: TStringBuf Data; }; - - class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{ - public: - typedef typename TSlowCapturingFsm::TScanner::State TState; - inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm) - : TMatcherBase<TSlowCapturingFsm>(fsm) - , HasCaptured(false) - { - } - - inline bool Captured() const noexcept { - return HasCaptured; - } - - inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { - TStringBuf textData(data, len); - Data = textData; + + class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{ + public: + typedef typename TSlowCapturingFsm::TScanner::State TState; + inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm) + : TMatcherBase<TSlowCapturingFsm>(fsm) + , HasCaptured(false) + { + } + + inline bool Captured() const noexcept { + return HasCaptured; + } + + inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { + TStringBuf textData(data, len); + Data = textData; Run(Data.begin(), Data.size(), addBegin, addEnd); - return GetAns(); - } - + return GetAns(); + } + inline TSlowSearcher& Search(const TStringBuf& s) noexcept { return Search(s.data(), s.size()); - } - - inline TStringBuf GetCaptured() const noexcept { - return Ans; - } - - private: - TStringBuf Data; - TStringBuf Ans; - bool HasCaptured; - - inline TSlowSearcher& GetAns() { - auto state = GetState(); - Pire::SlowCapturingScanner::SingleState final; - if (!GetScanner().GetCapture(state, final)) { - HasCaptured = false; - } else { - if (!final.HasEnd()) { - final.SetEnd(Data.size()); - } - Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin()); - HasCaptured = true; - } - return *this; - } - }; + } + + inline TStringBuf GetCaptured() const noexcept { + return Ans; + } + + private: + TStringBuf Data; + TStringBuf Ans; + bool HasCaptured; + + inline TSlowSearcher& GetAns() { + auto state = GetState(); + Pire::SlowCapturingScanner::SingleState final; + if (!GetScanner().GetCapture(state, final)) { + HasCaptured = false; + } else { + if (!final.HasEnd()) { + final.SetEnd(Data.size()); + } + Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin()); + HasCaptured = true; + } + return *this; + } + }; } diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index e7206de9ad..19694cfe59 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -216,95 +216,95 @@ Y_UNIT_TEST_SUITE(TRegExp) { UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("xx")); } - Y_UNIT_TEST(SlowCapture) { - TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", - TFsm::TOptions().SetCapture(2)); - TSlowSearcher searcher(fsm); - searcher.Search("http://vkontakte.ru/id100500"); - UNIT_ASSERT(searcher.Captured()); + Y_UNIT_TEST(SlowCapture) { + TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", + TFsm::TOptions().SetCapture(2)); + TSlowSearcher searcher(fsm); + searcher.Search("http://vkontakte.ru/id100500"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("100500")); - } - - Y_UNIT_TEST(SlowCaptureGreedy) { - TSlowCapturingFsm fsm(".*(pref.*suff)"); - TSlowSearcher searcher(fsm); - searcher.Search("pref ala bla pref cla suff dla"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCaptureGreedy) { + TSlowCapturingFsm fsm(".*(pref.*suff)"); + TSlowSearcher searcher(fsm); + searcher.Search("pref ala bla pref cla suff dla"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref cla suff")); - } - - Y_UNIT_TEST(SlowCaptureNonGreedy) { - TSlowCapturingFsm fsm(".*?(pref.*suff)"); - TSlowSearcher searcher(fsm); - searcher.Search("pref ala bla pref cla suff dla"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCaptureNonGreedy) { + TSlowCapturingFsm fsm(".*?(pref.*suff)"); + TSlowSearcher searcher(fsm); + searcher.Search("pref ala bla pref cla suff dla"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref ala bla pref cla suff")); - } - - Y_UNIT_TEST(SlowCapture2) { - TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", - TFsm::TOptions().SetCharset(CODES_UTF8)); - - TSlowSearcher searcher(fsm); - searcher.Search(" Здравствуйте, Уважаемый (-ая)! "); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCapture2) { + TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", + TFsm::TOptions().SetCharset(CODES_UTF8)); + + TSlowSearcher searcher(fsm); + searcher.Search(" Здравствуйте, Уважаемый (-ая)! "); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)")); - } - - Y_UNIT_TEST(SlowCapture3) { - TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);"); - TSlowSearcher searcher(fsm); - searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCapture3) { + TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);"); + TSlowSearcher searcher(fsm); + searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("0x0d0a")); - } - - Y_UNIT_TEST(SlowCapture4) { - TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\""); - TSlowSearcher searcher(fsm); - searcher.Search("\"/away.php?to=http:some.addr\"&id=1"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCapture4) { + TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\""); + TSlowSearcher searcher(fsm); + searcher.Search("\"/away.php?to=http:some.addr\"&id=1"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("some.addr")); - } - - Y_UNIT_TEST(CapturedEmptySlow) { - TSlowCapturingFsm fsm("Comments=(.*)$"); - TSlowSearcher searcher(fsm); - searcher.Search("And Comments="); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(CapturedEmptySlow) { + TSlowCapturingFsm fsm("Comments=(.*)$"); + TSlowSearcher searcher(fsm); + searcher.Search("And Comments="); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("")); - } - - Y_UNIT_TEST(CaptureInOrFirst) { - TSlowCapturingFsm fsm("(A)|A"); - TSlowSearcher searcher(fsm); - searcher.Search("A"); - UNIT_ASSERT(searcher.Captured()); - } - - Y_UNIT_TEST(CaptureInOrSecond) { - TSlowCapturingFsm fsm("A|(A)"); - TSlowSearcher searcher(fsm); - searcher.Search("A"); - UNIT_ASSERT(!searcher.Captured()); - } - - Y_UNIT_TEST(CaptureOutside) { - TSlowCapturingFsm fsm("((ID=([0-9]+))?)"); - TSlowSearcher searcher(fsm); - searcher.Search("ID="); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(CaptureInOrFirst) { + TSlowCapturingFsm fsm("(A)|A"); + TSlowSearcher searcher(fsm); + searcher.Search("A"); + UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(CaptureInOrSecond) { + TSlowCapturingFsm fsm("A|(A)"); + TSlowSearcher searcher(fsm); + searcher.Search("A"); + UNIT_ASSERT(!searcher.Captured()); + } + + Y_UNIT_TEST(CaptureOutside) { + TSlowCapturingFsm fsm("((ID=([0-9]+))?)"); + TSlowSearcher searcher(fsm); + searcher.Search("ID="); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("")); - } - - Y_UNIT_TEST(CaptureInside) { - TSlowCapturingFsm fsm("((ID=([0-9]+))?)", - TFsm::TOptions().SetCapture(2)); - TSlowSearcher searcher(fsm); - searcher.Search("ID="); - UNIT_ASSERT(!searcher.Captured()); - } - + } + + Y_UNIT_TEST(CaptureInside) { + TSlowCapturingFsm fsm("((ID=([0-9]+))?)", + TFsm::TOptions().SetCapture(2)); + TSlowSearcher searcher(fsm); + searcher.Search("ID="); + UNIT_ASSERT(!searcher.Captured()); + } + Y_UNIT_TEST(Pcre2PireTest) { UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)"); UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?"); |