aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex
diff options
context:
space:
mode:
authoriblinnikov <iblinnikov@yandex-team.ru>2022-02-10 16:48:07 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:07 +0300
commitb420f761873190614f41ed39c6d96bd3dc14fd00 (patch)
tree88f25d43498bcac1aa20e80ca2979e17a9f95018 /library/cpp/regex
parent1916d87e4a1be8b60140240d49f0572a22e54bf8 (diff)
downloadydb-b420f761873190614f41ed39c6d96bd3dc14fd00.tar.gz
Restoring authorship annotation for <iblinnikov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r--library/cpp/regex/pire/regexp.h128
-rw-r--r--library/cpp/regex/pire/ut/regexp_ut.cpp162
2 files changed, 145 insertions, 145 deletions
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h
index 94bba4064b..92ce16e14f 100644
--- a/library/cpp/regex/pire/regexp.h
+++ b/library/cpp/regex/pire/regexp.h
@@ -48,7 +48,7 @@ namespace NRegExp {
};
static inline NPire::TFsm Parse(const TStringBuf& regexp,
- const TOptions& opts, const bool needDetermine = true) {
+ const TOptions& opts, const bool needDetermine = true) {
NPire::TLexer lexer;
if (opts.Charset == CODES_UNKNOWN) {
lexer.Assign(regexp.data(), regexp.data() + regexp.size());
@@ -98,9 +98,9 @@ namespace NRegExp {
ret.Surround();
}
- if (needDetermine) {
- ret.Determine();
- }
+ if (needDetermine) {
+ ret.Determine();
+ }
return ret;
}
@@ -113,8 +113,8 @@ namespace NRegExp {
public:
inline explicit TFsmParser(const TStringBuf& regexp,
- const TOptions& opts = TOptions(), bool needDetermine = true)
- : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
+ const TOptions& opts = TOptions(), bool needDetermine = true)
+ : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
{
}
@@ -177,19 +177,19 @@ namespace NRegExp {
}
};
- struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
+ struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
inline explicit TSlowCapturingFsm(const TStringBuf& regexp,
- TOptions opts = TOptions())
- : TFsmParser<TScanner>(regexp,
- opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) {
- }
-
- inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm)
- : TFsmParser<TScanner>(fsm)
- {
- }
- };
-
+ TOptions opts = TOptions())
+ : TFsmParser<TScanner>(regexp,
+ opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) {
+ }
+
+ inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm)
+ : TFsmParser<TScanner>(fsm)
+ {
+ }
+ };
+
template <class TFsm>
class TMatcherBase {
public:
@@ -285,53 +285,53 @@ namespace NRegExp {
private:
TStringBuf Data;
};
-
- class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{
- public:
- typedef typename TSlowCapturingFsm::TScanner::State TState;
- inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm)
- : TMatcherBase<TSlowCapturingFsm>(fsm)
- , HasCaptured(false)
- {
- }
-
- inline bool Captured() const noexcept {
- return HasCaptured;
- }
-
- inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
- TStringBuf textData(data, len);
- Data = textData;
+
+ class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{
+ public:
+ typedef typename TSlowCapturingFsm::TScanner::State TState;
+ inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm)
+ : TMatcherBase<TSlowCapturingFsm>(fsm)
+ , HasCaptured(false)
+ {
+ }
+
+ inline bool Captured() const noexcept {
+ return HasCaptured;
+ }
+
+ inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
+ TStringBuf textData(data, len);
+ Data = textData;
Run(Data.begin(), Data.size(), addBegin, addEnd);
- return GetAns();
- }
-
+ return GetAns();
+ }
+
inline TSlowSearcher& Search(const TStringBuf& s) noexcept {
return Search(s.data(), s.size());
- }
-
- inline TStringBuf GetCaptured() const noexcept {
- return Ans;
- }
-
- private:
- TStringBuf Data;
- TStringBuf Ans;
- bool HasCaptured;
-
- inline TSlowSearcher& GetAns() {
- auto state = GetState();
- Pire::SlowCapturingScanner::SingleState final;
- if (!GetScanner().GetCapture(state, final)) {
- HasCaptured = false;
- } else {
- if (!final.HasEnd()) {
- final.SetEnd(Data.size());
- }
- Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin());
- HasCaptured = true;
- }
- return *this;
- }
- };
+ }
+
+ inline TStringBuf GetCaptured() const noexcept {
+ return Ans;
+ }
+
+ private:
+ TStringBuf Data;
+ TStringBuf Ans;
+ bool HasCaptured;
+
+ inline TSlowSearcher& GetAns() {
+ auto state = GetState();
+ Pire::SlowCapturingScanner::SingleState final;
+ if (!GetScanner().GetCapture(state, final)) {
+ HasCaptured = false;
+ } else {
+ if (!final.HasEnd()) {
+ final.SetEnd(Data.size());
+ }
+ Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin());
+ HasCaptured = true;
+ }
+ return *this;
+ }
+ };
}
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index e7206de9ad..19694cfe59 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -216,95 +216,95 @@ Y_UNIT_TEST_SUITE(TRegExp) {
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("xx"));
}
- Y_UNIT_TEST(SlowCapture) {
- TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",
- TFsm::TOptions().SetCapture(2));
- TSlowSearcher searcher(fsm);
- searcher.Search("http://vkontakte.ru/id100500");
- UNIT_ASSERT(searcher.Captured());
+ Y_UNIT_TEST(SlowCapture) {
+ TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",
+ TFsm::TOptions().SetCapture(2));
+ TSlowSearcher searcher(fsm);
+ searcher.Search("http://vkontakte.ru/id100500");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("100500"));
- }
-
- Y_UNIT_TEST(SlowCaptureGreedy) {
- TSlowCapturingFsm fsm(".*(pref.*suff)");
- TSlowSearcher searcher(fsm);
- searcher.Search("pref ala bla pref cla suff dla");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCaptureGreedy) {
+ TSlowCapturingFsm fsm(".*(pref.*suff)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("pref ala bla pref cla suff dla");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref cla suff"));
- }
-
- Y_UNIT_TEST(SlowCaptureNonGreedy) {
- TSlowCapturingFsm fsm(".*?(pref.*suff)");
- TSlowSearcher searcher(fsm);
- searcher.Search("pref ala bla pref cla suff dla");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCaptureNonGreedy) {
+ TSlowCapturingFsm fsm(".*?(pref.*suff)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("pref ala bla pref cla suff dla");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref ala bla pref cla suff"));
- }
-
- Y_UNIT_TEST(SlowCapture2) {
- TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",
- TFsm::TOptions().SetCharset(CODES_UTF8));
-
- TSlowSearcher searcher(fsm);
- searcher.Search(" Здравствуйте, Уважаемый (-ая)! ");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCapture2) {
+ TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",
+ TFsm::TOptions().SetCharset(CODES_UTF8));
+
+ TSlowSearcher searcher(fsm);
+ searcher.Search(" Здравствуйте, Уважаемый (-ая)! ");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)"));
- }
-
- Y_UNIT_TEST(SlowCapture3) {
- TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);");
- TSlowSearcher searcher(fsm);
- searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCapture3) {
+ TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("0x0d0a"));
- }
-
- Y_UNIT_TEST(SlowCapture4) {
- TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\"");
- TSlowSearcher searcher(fsm);
- searcher.Search("\"/away.php?to=http:some.addr\"&id=1");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCapture4) {
+ TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\"");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("\"/away.php?to=http:some.addr\"&id=1");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("some.addr"));
- }
-
- Y_UNIT_TEST(CapturedEmptySlow) {
- TSlowCapturingFsm fsm("Comments=(.*)$");
- TSlowSearcher searcher(fsm);
- searcher.Search("And Comments=");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CapturedEmptySlow) {
+ TSlowCapturingFsm fsm("Comments=(.*)$");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("And Comments=");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf(""));
- }
-
- Y_UNIT_TEST(CaptureInOrFirst) {
- TSlowCapturingFsm fsm("(A)|A");
- TSlowSearcher searcher(fsm);
- searcher.Search("A");
- UNIT_ASSERT(searcher.Captured());
- }
-
- Y_UNIT_TEST(CaptureInOrSecond) {
- TSlowCapturingFsm fsm("A|(A)");
- TSlowSearcher searcher(fsm);
- searcher.Search("A");
- UNIT_ASSERT(!searcher.Captured());
- }
-
- Y_UNIT_TEST(CaptureOutside) {
- TSlowCapturingFsm fsm("((ID=([0-9]+))?)");
- TSlowSearcher searcher(fsm);
- searcher.Search("ID=");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CaptureInOrFirst) {
+ TSlowCapturingFsm fsm("(A)|A");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("A");
+ UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CaptureInOrSecond) {
+ TSlowCapturingFsm fsm("A|(A)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("A");
+ UNIT_ASSERT(!searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CaptureOutside) {
+ TSlowCapturingFsm fsm("((ID=([0-9]+))?)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("ID=");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf(""));
- }
-
- Y_UNIT_TEST(CaptureInside) {
- TSlowCapturingFsm fsm("((ID=([0-9]+))?)",
- TFsm::TOptions().SetCapture(2));
- TSlowSearcher searcher(fsm);
- searcher.Search("ID=");
- UNIT_ASSERT(!searcher.Captured());
- }
-
+ }
+
+ Y_UNIT_TEST(CaptureInside) {
+ TSlowCapturingFsm fsm("((ID=([0-9]+))?)",
+ TFsm::TOptions().SetCapture(2));
+ TSlowSearcher searcher(fsm);
+ searcher.Search("ID=");
+ UNIT_ASSERT(!searcher.Captured());
+ }
+
Y_UNIT_TEST(Pcre2PireTest) {
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)");
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?");