aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoriblinnikov <iblinnikov@yandex-team.ru>2022-02-10 16:48:07 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:07 +0300
commitb420f761873190614f41ed39c6d96bd3dc14fd00 (patch)
tree88f25d43498bcac1aa20e80ca2979e17a9f95018
parent1916d87e4a1be8b60140240d49f0572a22e54bf8 (diff)
downloadydb-b420f761873190614f41ed39c6d96bd3dc14fd00.tar.gz
Restoring authorship annotation for <iblinnikov@yandex-team.ru>. Commit 1 of 2.
-rw-r--r--contrib/libs/pire/pire/extra/capture.cpp94
-rw-r--r--contrib/libs/pire/pire/extra/capture.h810
-rw-r--r--contrib/libs/pire/pire/fsm.cpp4
-rw-r--r--contrib/libs/pire/pire/fsm.h2
-rw-r--r--contrib/libs/pire/pire/re_parser.y2
-rw-r--r--contrib/libs/pire/pire/run.h42
-rw-r--r--contrib/libs/pire/pire/scanner_io.cpp44
-rw-r--r--contrib/libs/pire/pire/scanners/slow.h144
-rw-r--r--library/cpp/regex/pire/regexp.h128
-rw-r--r--library/cpp/regex/pire/ut/regexp_ut.cpp162
10 files changed, 716 insertions, 716 deletions
diff --git a/contrib/libs/pire/pire/extra/capture.cpp b/contrib/libs/pire/pire/extra/capture.cpp
index fb4cdf6d81..a3e663af83 100644
--- a/contrib/libs/pire/pire/extra/capture.cpp
+++ b/contrib/libs/pire/pire/extra/capture.cpp
@@ -34,25 +34,25 @@ namespace {
: State(0)
, Pos(pos)
, Level(0)
- , StateRepetition(NoRepetition)
+ , StateRepetition(NoRepetition)
{}
- bool Accepts(wchar32 c) const { return c == '(' || c == '+' || c == '*' || c == '?' || c == '{'; }
+ bool Accepts(wchar32 c) const { return c == '(' || c == '+' || c == '*' || c == '?' || c == '{'; }
Term Lex()
{
- wchar32 c = GetChar();
- if (!Accepts(c))
+ wchar32 c = GetChar();
+ if (!Accepts(c))
Error("How did we get here?!..");
- if (c != '(') {
- wchar32 next = PeekChar();
- if (next == '?') {
- StateRepetition = NonGreedyRepetition;
- GetChar();
- }
- else
- StateRepetition = GreedyRepetition;
- }
- else if (State == 0 && Pos > 1)
+ if (c != '(') {
+ wchar32 next = PeekChar();
+ if (next == '?') {
+ StateRepetition = NonGreedyRepetition;
+ GetChar();
+ }
+ else
+ StateRepetition = GreedyRepetition;
+ }
+ else if (State == 0 && Pos > 1)
--Pos;
else if (State == 0 && Pos == 1) {
State = 1;
@@ -60,27 +60,27 @@ namespace {
} else if (State == 1) {
++Level;
}
- if (c == '(')
- return Term(TokenTypes::Open);
- else if (c == '+')
- return Term::Repetition(1, Inf);
- else if (c == '*')
- return Term::Repetition(0, Inf);
- else if (c == '?')
- return Term::Repetition(0, 1);
- else {
- UngetChar(c);
- return Term(0);
- }
+ if (c == '(')
+ return Term(TokenTypes::Open);
+ else if (c == '+')
+ return Term::Repetition(1, Inf);
+ else if (c == '*')
+ return Term::Repetition(0, Inf);
+ else if (c == '?')
+ return Term::Repetition(0, 1);
+ else {
+ UngetChar(c);
+ return Term(0);
+ }
}
void Parenthesized(Fsm& fsm)
{
- if (StateRepetition != NoRepetition) {
- bool greedy = (StateRepetition == GreedyRepetition);
- SetRepetitionMark(fsm, greedy);
- StateRepetition = NoRepetition;
- } else if (State == 1 && Level == 0) {
+ if (StateRepetition != NoRepetition) {
+ bool greedy = (StateRepetition == GreedyRepetition);
+ SetRepetitionMark(fsm, greedy);
+ StateRepetition = NoRepetition;
+ } else if (State == 1 && Level == 0) {
SetCaptureMark(fsm);
State = 2;
} else if (State == 1 && Level > 0)
@@ -90,23 +90,23 @@ namespace {
unsigned State;
size_t Pos;
size_t Level;
- RepetitionTypes StateRepetition;
-
- void SetRepetitionMark(Fsm& fsm, bool greedy)
- {
+ RepetitionTypes StateRepetition;
+
+ void SetRepetitionMark(Fsm& fsm, bool greedy)
+ {
fsm.Resize(fsm.Size() + 1);
- fsm.ConnectFinal(fsm.Size() - 1);
-
- for (size_t state = 0; state < fsm.Size() - 1; ++state)
- if (fsm.IsFinal(state))
- if (greedy)
- fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndRepetition);
- else
- fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndNonGreedyRepetition);
- fsm.ClearFinal();
- fsm.SetFinal(fsm.Size() - 1, true);
- fsm.SetIsDetermined(false);
- }
+ fsm.ConnectFinal(fsm.Size() - 1);
+
+ for (size_t state = 0; state < fsm.Size() - 1; ++state)
+ if (fsm.IsFinal(state))
+ if (greedy)
+ fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndRepetition);
+ else
+ fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndNonGreedyRepetition);
+ fsm.ClearFinal();
+ fsm.SetFinal(fsm.Size() - 1, true);
+ fsm.SetIsDetermined(false);
+ }
void SetCaptureMark(Fsm& fsm)
{
diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h
index 8399914a67..9951de166a 100644
--- a/contrib/libs/pire/pire/extra/capture.h
+++ b/contrib/libs/pire/pire/extra/capture.h
@@ -27,14 +27,14 @@
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/scanners/loaded.h>
-#include <contrib/libs/pire/pire/scanners/multi.h>
-#include <contrib/libs/pire/pire/scanners/slow.h>
+#include <contrib/libs/pire/pire/scanners/multi.h>
+#include <contrib/libs/pire/pire/scanners/slow.h>
#include <contrib/libs/pire/pire/fsm.h>
#include <contrib/libs/pire/pire/re_lexer.h>
-#include <contrib/libs/pire/pire/run.h>
-
-#include <array>
+#include <contrib/libs/pire/pire/run.h>
+#include <array>
+
namespace Pire {
/**
@@ -158,67 +158,67 @@ private:
friend void BuildScanner<CapturingScanner>(const Fsm&, CapturingScanner&);
};
-
-enum RepetitionTypes { // They are sorted by their priorities
- NonGreedyRepetition,
- NoRepetition,
- GreedyRepetition,
-};
-
-/**
-* A Slowcapturing scanner.
-* Does not require FSM to be deterministic, uses O(|text| * |regexp|) time, but always returns the correct capture and supports both greedy and non-greedy quantifiers.
-*/
-class SlowCapturingScanner : public SlowScanner {
-public:
- enum {
- Nothing = 0,
- BeginCapture = 1,
- EndCapture = 2,
- EndRepetition = 4,
- EndNonGreedyRepetition = 8,
-
- FinalFlag = 1,
- };
-
- const ui32 ActionsCapture = BeginCapture | EndCapture;
-
- class SingleState {
- public:
- bool Captured() const
- {
- return (m_begin != m_npos && m_end != m_npos);
- }
-
- bool HasBegin() const
- {
- return (m_begin != m_npos);
- }
-
- bool HasEnd() const
- {
- return (m_end != m_npos);
- }
-
- SingleState(size_t num = 0)
- {
- m_state = num;
- m_begin = m_npos;
- m_end = m_npos;
- }
-
- void SetBegin(size_t pos)
- {
- if (m_begin == m_npos)
- m_begin = pos;
- }
-
- void SetEnd(size_t pos)
- {
- if (m_end == m_npos)
- m_end = pos;
- }
-
+
+enum RepetitionTypes { // They are sorted by their priorities
+ NonGreedyRepetition,
+ NoRepetition,
+ GreedyRepetition,
+};
+
+/**
+* A Slowcapturing scanner.
+* Does not require FSM to be deterministic, uses O(|text| * |regexp|) time, but always returns the correct capture and supports both greedy and non-greedy quantifiers.
+*/
+class SlowCapturingScanner : public SlowScanner {
+public:
+ enum {
+ Nothing = 0,
+ BeginCapture = 1,
+ EndCapture = 2,
+ EndRepetition = 4,
+ EndNonGreedyRepetition = 8,
+
+ FinalFlag = 1,
+ };
+
+ const ui32 ActionsCapture = BeginCapture | EndCapture;
+
+ class SingleState {
+ public:
+ bool Captured() const
+ {
+ return (m_begin != m_npos && m_end != m_npos);
+ }
+
+ bool HasBegin() const
+ {
+ return (m_begin != m_npos);
+ }
+
+ bool HasEnd() const
+ {
+ return (m_end != m_npos);
+ }
+
+ SingleState(size_t num = 0)
+ {
+ m_state = num;
+ m_begin = m_npos;
+ m_end = m_npos;
+ }
+
+ void SetBegin(size_t pos)
+ {
+ if (m_begin == m_npos)
+ m_begin = pos;
+ }
+
+ void SetEnd(size_t pos)
+ {
+ if (m_end == m_npos)
+ m_end = pos;
+ }
+
size_t Begin() const
{
return GetBegin();
@@ -229,358 +229,358 @@ public:
return GetEnd();
}
- size_t GetBegin() const
- {
- return m_begin;
- }
-
- size_t GetEnd() const
- {
- return m_end;
- }
-
- size_t GetNum() const
- {
- return m_state;
- }
-
- private:
- size_t m_state;
- size_t m_begin;
- size_t m_end;
- static const size_t m_npos = static_cast<size_t>(-1);
- };
-
- class State {
- public:
- State()
+ size_t GetBegin() const
+ {
+ return m_begin;
+ }
+
+ size_t GetEnd() const
+ {
+ return m_end;
+ }
+
+ size_t GetNum() const
+ {
+ return m_state;
+ }
+
+ private:
+ size_t m_state;
+ size_t m_begin;
+ size_t m_end;
+ static const size_t m_npos = static_cast<size_t>(-1);
+ };
+
+ class State {
+ public:
+ State()
: m_strpos(0)
, m_matched(false) {}
-
- size_t GetPos() const
- {
- return m_strpos;
- }
-
- const SingleState& GetState(size_t pos) const
- {
- return m_states[pos];
- }
-
- void SetPos(size_t newPos)
- {
- m_strpos = newPos;
- }
-
- void PushState(SingleState& st)
- {
- m_states.push_back(st);
- }
-
- void PushState(const SingleState& st)
- {
- m_states.push_back(st);
- }
-
- size_t GetSize() const
- {
- return m_states.size();
- }
-
- const TVector<SingleState>& GetStates() const
- {
- return m_states;
- }
-
- bool IsMatched() const
- {
- return m_matched;
- }
-
- const SingleState& GetMatched() const
- {
- return m_match;
- }
-
- void AddMatch(const SingleState& Matched)
- {
- m_matched = true;
- m_match = Matched;
- }
-
- private:
- TVector<SingleState> m_states;
- size_t m_strpos;
- bool m_matched;
- SingleState m_match;
- };
-
- class Transition {
- private:
- unsigned long m_stateto;
- Action m_action;
-
- public:
- unsigned long GetState() const
- {
- return m_stateto;
- }
-
- Action GetAction() const
- {
- return m_action;
- }
-
- Transition(unsigned long state, Action act = 0)
+
+ size_t GetPos() const
+ {
+ return m_strpos;
+ }
+
+ const SingleState& GetState(size_t pos) const
+ {
+ return m_states[pos];
+ }
+
+ void SetPos(size_t newPos)
+ {
+ m_strpos = newPos;
+ }
+
+ void PushState(SingleState& st)
+ {
+ m_states.push_back(st);
+ }
+
+ void PushState(const SingleState& st)
+ {
+ m_states.push_back(st);
+ }
+
+ size_t GetSize() const
+ {
+ return m_states.size();
+ }
+
+ const TVector<SingleState>& GetStates() const
+ {
+ return m_states;
+ }
+
+ bool IsMatched() const
+ {
+ return m_matched;
+ }
+
+ const SingleState& GetMatched() const
+ {
+ return m_match;
+ }
+
+ void AddMatch(const SingleState& Matched)
+ {
+ m_matched = true;
+ m_match = Matched;
+ }
+
+ private:
+ TVector<SingleState> m_states;
+ size_t m_strpos;
+ bool m_matched;
+ SingleState m_match;
+ };
+
+ class Transition {
+ private:
+ unsigned long m_stateto;
+ Action m_action;
+
+ public:
+ unsigned long GetState() const
+ {
+ return m_stateto;
+ }
+
+ Action GetAction() const
+ {
+ return m_action;
+ }
+
+ Transition(unsigned long state, Action act = 0)
: m_stateto(state)
, m_action(act)
- {
- }
- };
-
- class PriorityStates {
- private:
- TVector<SingleState> m_nonGreedy;
- TVector<SingleState> m_nothing;
- TVector<SingleState> m_greedy;
-
- public:
- void Push(const SingleState& st, RepetitionTypes repetition)
- {
- Get(repetition).push_back(st);
- }
-
- TVector<SingleState>& Get(RepetitionTypes repetition)
- {
- switch (repetition) {
- case NonGreedyRepetition:
- return m_nonGreedy;
- case NoRepetition:
- return m_nothing;
- case GreedyRepetition:
- return m_greedy;
- }
- }
-
- const TVector<SingleState>& Get(RepetitionTypes repetition) const
- {
- switch (repetition) {
- case NonGreedyRepetition:
- return m_nonGreedy;
- case NoRepetition:
- return m_nothing;
- case GreedyRepetition:
- return m_greedy;
- }
- }
- };
-
- SlowScanner::State GetNextStates(const SingleState& cur, Char letter) const
- {
+ {
+ }
+ };
+
+ class PriorityStates {
+ private:
+ TVector<SingleState> m_nonGreedy;
+ TVector<SingleState> m_nothing;
+ TVector<SingleState> m_greedy;
+
+ public:
+ void Push(const SingleState& st, RepetitionTypes repetition)
+ {
+ Get(repetition).push_back(st);
+ }
+
+ TVector<SingleState>& Get(RepetitionTypes repetition)
+ {
+ switch (repetition) {
+ case NonGreedyRepetition:
+ return m_nonGreedy;
+ case NoRepetition:
+ return m_nothing;
+ case GreedyRepetition:
+ return m_greedy;
+ }
+ }
+
+ const TVector<SingleState>& Get(RepetitionTypes repetition) const
+ {
+ switch (repetition) {
+ case NonGreedyRepetition:
+ return m_nonGreedy;
+ case NoRepetition:
+ return m_nothing;
+ case GreedyRepetition:
+ return m_greedy;
+ }
+ }
+ };
+
+ SlowScanner::State GetNextStates(const SingleState& cur, Char letter) const
+ {
SlowScanner::State st(GetSize());
st.states.push_back(cur.GetNum());
SlowScanner::State nextState(GetSize());
SlowScanner::NextTranslated(st, nextState, letter);
return nextState;
- }
-
- size_t GetPosition(const SingleState& state, Char letter) const
- {
- return state.GetNum() * GetLettersCount() + letter;
- }
-
- void NextStates(const SingleState& state, Char letter, TVector<Transition>& nextStates) const
- {
- if (IsMmaped()) {
- const size_t* pos = GetJumpPos() + GetPosition(state, letter);
- size_t posBegin = pos[0];
- size_t posEnd = pos[1];
- for (size_t i = posBegin; i < posEnd; ++i)
- nextStates.emplace_back(GetJump(i), GetAction(i));
- } else {
- size_t num = GetPosition(state, letter);
- const auto& jumpVec = GetJumpsVec(num);
- const auto& actionVec = GetActionsVec(num);
- for (size_t i = 0; i < jumpVec.size(); ++i)
- nextStates.emplace_back(jumpVec[i], actionVec[i]);
- }
- }
-
- void InsertStates(const PriorityStates& states, TVector<SingleState>& nonGreedy, TVector<SingleState>& nothing, TVector<SingleState>& greedy) const
- {
- for (auto& greed : {ymake_pair(&nonGreedy, NonGreedyRepetition), ymake_pair(&nothing, NoRepetition), ymake_pair(&greedy, GreedyRepetition)}) {
- auto& vec = greed.first;
- auto& tag = greed.second;
- vec->insert(vec->end(), states.Get(tag).begin(), states.Get(tag).end());
- }
- }
-
- void NextAndGetToGroups(PriorityStates& states, const SingleState& cur,
+ }
+
+ size_t GetPosition(const SingleState& state, Char letter) const
+ {
+ return state.GetNum() * GetLettersCount() + letter;
+ }
+
+ void NextStates(const SingleState& state, Char letter, TVector<Transition>& nextStates) const
+ {
+ if (IsMmaped()) {
+ const size_t* pos = GetJumpPos() + GetPosition(state, letter);
+ size_t posBegin = pos[0];
+ size_t posEnd = pos[1];
+ for (size_t i = posBegin; i < posEnd; ++i)
+ nextStates.emplace_back(GetJump(i), GetAction(i));
+ } else {
+ size_t num = GetPosition(state, letter);
+ const auto& jumpVec = GetJumpsVec(num);
+ const auto& actionVec = GetActionsVec(num);
+ for (size_t i = 0; i < jumpVec.size(); ++i)
+ nextStates.emplace_back(jumpVec[i], actionVec[i]);
+ }
+ }
+
+ void InsertStates(const PriorityStates& states, TVector<SingleState>& nonGreedy, TVector<SingleState>& nothing, TVector<SingleState>& greedy) const
+ {
+ for (auto& greed : {ymake_pair(&nonGreedy, NonGreedyRepetition), ymake_pair(&nothing, NoRepetition), ymake_pair(&greedy, GreedyRepetition)}) {
+ auto& vec = greed.first;
+ auto& tag = greed.second;
+ vec->insert(vec->end(), states.Get(tag).begin(), states.Get(tag).end());
+ }
+ }
+
+ void NextAndGetToGroups(PriorityStates& states, const SingleState& cur,
Char letter, size_t pos, TVector<bool>& used) const
- {
- TVector<Transition> nextStates;
- NextStates(cur, letter, nextStates);
- for (const auto& trans : nextStates) {
- size_t st = trans.GetState();
- if (used[st])
- continue;
- used[st] = true;
- SingleState state(st);
- const auto& action = trans.GetAction();
- state.SetBegin(cur.GetBegin());
- state.SetEnd(cur.GetEnd());
- if (action & BeginCapture && !cur.HasBegin()) {
- state.SetBegin(pos);
- }
- if (action & EndCapture && !cur.HasEnd()) {
- state.SetEnd(pos);
- }
- PriorityStates statesInside;
- NextAndGetToGroups(statesInside, state, Translate(Epsilon), pos, used);
- statesInside.Push(state, NoRepetition);
- if (action & EndNonGreedyRepetition) {
- auto& nongreedy = states.Get(NonGreedyRepetition);
- InsertStates(statesInside, nongreedy, nongreedy, nongreedy);
- }
- else if (!(action & EndRepetition))
- InsertStates(statesInside, states.Get(NonGreedyRepetition), states.Get(NoRepetition), states.Get(GreedyRepetition));
- else {
- auto& greedy = states.Get(GreedyRepetition);
- InsertStates(statesInside, greedy, greedy, greedy);
- }
- }
- }
-
- bool Captured(const SingleState& st, bool& matched) const
- {
- matched = false;
- if (IsFinal(st.GetNum())) {
- matched = true;
- if (st.HasBegin())
- return true;
- }
- TVector<Transition> nextStates;
- NextStates(st, Translate(EndMark), nextStates);
- for (const auto& trans : nextStates)
- {
- size_t state = trans.GetState();
- if (IsFinal(state)) {
- matched = true;
- if (st.HasBegin() || (trans.GetAction() & ActionsCapture))
- return true;
- } else { // After EndMark there can be Epsilon-transitions to the Final State
- TVector<Transition> epsilonTrans;
- SingleState newSt(state);
- NextStates(newSt, Translate(Epsilon), epsilonTrans);
- for (auto new_trans : epsilonTrans) {
- size_t fin = new_trans.GetState();
- if (IsFinal(fin)) {
- matched = true;
- if (st.HasBegin() || (trans.GetAction() & ActionsCapture))
- return true;
- }
- }
- }
- }
- return false;
- }
-
- bool Matched(const SingleState& st) const
- {
- bool matched;
- Captured(st, matched);
- return matched;
- }
-
- bool GetCapture(const State& st, SingleState& final) const
- {
- size_t pos = 0;
- bool matched = false;
- bool ans = false;
- while (pos < st.GetSize() && !matched) {
- ans = Captured(st.GetState(pos), matched);
- ++pos;
- }
- if (matched) {
- final = st.GetState(pos - 1);
- return ans;
- } else {
- if (st.IsMatched()) {
- final = st.GetMatched();
- return true;
- }
- return false;
- }
- }
-
- bool PushState(State& nlist, const SingleState& state) const
- {
- nlist.PushState(state);
- if (Matched(state)) {
- nlist.AddMatch(state);
- return true;
- }
- return false;
- }
-
- void UpdateNList(State& nlist, const PriorityStates& states) const
- {
- static constexpr std::array<RepetitionTypes, 3> m_type_by_priority{{NonGreedyRepetition, NoRepetition, GreedyRepetition}};
- for (const auto type : m_type_by_priority) {
- for (const auto& state : states.Get(type)) {
- if (PushState(nlist, state)) // Because we have strict priorities, after matching some state, we can be sure, that not states after will be better
- return;
- }
- }
- }
-
- void Initialize(State& nlist) const
- {
- PriorityStates states;
- SingleState init(GetStart());
- TVector<bool> used(GetSize());
- NextAndGetToGroups(states, init, Translate(BeginMark), 0, used);
- NextAndGetToGroups(states, 0, Translate(BeginMark), 0, used);
- UpdateNList(nlist, states);
- }
-
- Action NextTranslated(State& clist, Char letter) const
- {
- State nlist;
- if (clist.IsMatched())
- nlist.AddMatch(clist.GetMatched());
- nlist.SetPos(clist.GetPos() + 1);
- size_t strpos = nlist.GetPos();
- TVector<bool> used(GetSize());
- size_t pos = 0;
- while (pos < clist.GetSize()) {
- PriorityStates states;
- NextAndGetToGroups(states, clist.GetState(pos), letter, strpos, used);
- UpdateNList(nlist, states);
- ++pos;
- }
- DoSwap(clist, nlist);
- return 0;
- }
-
- void TakeAction(State&, Action) const {}
-
- Action Next(State& st, Char letter) const
- {
- return NextTranslated(st, Translate(letter));
- }
-
-public:
- SlowCapturingScanner()
- : SlowScanner(true)
- {
- }
+ {
+ TVector<Transition> nextStates;
+ NextStates(cur, letter, nextStates);
+ for (const auto& trans : nextStates) {
+ size_t st = trans.GetState();
+ if (used[st])
+ continue;
+ used[st] = true;
+ SingleState state(st);
+ const auto& action = trans.GetAction();
+ state.SetBegin(cur.GetBegin());
+ state.SetEnd(cur.GetEnd());
+ if (action & BeginCapture && !cur.HasBegin()) {
+ state.SetBegin(pos);
+ }
+ if (action & EndCapture && !cur.HasEnd()) {
+ state.SetEnd(pos);
+ }
+ PriorityStates statesInside;
+ NextAndGetToGroups(statesInside, state, Translate(Epsilon), pos, used);
+ statesInside.Push(state, NoRepetition);
+ if (action & EndNonGreedyRepetition) {
+ auto& nongreedy = states.Get(NonGreedyRepetition);
+ InsertStates(statesInside, nongreedy, nongreedy, nongreedy);
+ }
+ else if (!(action & EndRepetition))
+ InsertStates(statesInside, states.Get(NonGreedyRepetition), states.Get(NoRepetition), states.Get(GreedyRepetition));
+ else {
+ auto& greedy = states.Get(GreedyRepetition);
+ InsertStates(statesInside, greedy, greedy, greedy);
+ }
+ }
+ }
+
+ bool Captured(const SingleState& st, bool& matched) const
+ {
+ matched = false;
+ if (IsFinal(st.GetNum())) {
+ matched = true;
+ if (st.HasBegin())
+ return true;
+ }
+ TVector<Transition> nextStates;
+ NextStates(st, Translate(EndMark), nextStates);
+ for (const auto& trans : nextStates)
+ {
+ size_t state = trans.GetState();
+ if (IsFinal(state)) {
+ matched = true;
+ if (st.HasBegin() || (trans.GetAction() & ActionsCapture))
+ return true;
+ } else { // After EndMark there can be Epsilon-transitions to the Final State
+ TVector<Transition> epsilonTrans;
+ SingleState newSt(state);
+ NextStates(newSt, Translate(Epsilon), epsilonTrans);
+ for (auto new_trans : epsilonTrans) {
+ size_t fin = new_trans.GetState();
+ if (IsFinal(fin)) {
+ matched = true;
+ if (st.HasBegin() || (trans.GetAction() & ActionsCapture))
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ bool Matched(const SingleState& st) const
+ {
+ bool matched;
+ Captured(st, matched);
+ return matched;
+ }
+
+ bool GetCapture(const State& st, SingleState& final) const
+ {
+ size_t pos = 0;
+ bool matched = false;
+ bool ans = false;
+ while (pos < st.GetSize() && !matched) {
+ ans = Captured(st.GetState(pos), matched);
+ ++pos;
+ }
+ if (matched) {
+ final = st.GetState(pos - 1);
+ return ans;
+ } else {
+ if (st.IsMatched()) {
+ final = st.GetMatched();
+ return true;
+ }
+ return false;
+ }
+ }
+
+ bool PushState(State& nlist, const SingleState& state) const
+ {
+ nlist.PushState(state);
+ if (Matched(state)) {
+ nlist.AddMatch(state);
+ return true;
+ }
+ return false;
+ }
+
+ void UpdateNList(State& nlist, const PriorityStates& states) const
+ {
+ static constexpr std::array<RepetitionTypes, 3> m_type_by_priority{{NonGreedyRepetition, NoRepetition, GreedyRepetition}};
+ for (const auto type : m_type_by_priority) {
+ for (const auto& state : states.Get(type)) {
+ if (PushState(nlist, state)) // Because we have strict priorities, after matching some state, we can be sure, that not states after will be better
+ return;
+ }
+ }
+ }
+
+ void Initialize(State& nlist) const
+ {
+ PriorityStates states;
+ SingleState init(GetStart());
+ TVector<bool> used(GetSize());
+ NextAndGetToGroups(states, init, Translate(BeginMark), 0, used);
+ NextAndGetToGroups(states, 0, Translate(BeginMark), 0, used);
+ UpdateNList(nlist, states);
+ }
+
+ Action NextTranslated(State& clist, Char letter) const
+ {
+ State nlist;
+ if (clist.IsMatched())
+ nlist.AddMatch(clist.GetMatched());
+ nlist.SetPos(clist.GetPos() + 1);
+ size_t strpos = nlist.GetPos();
+ TVector<bool> used(GetSize());
+ size_t pos = 0;
+ while (pos < clist.GetSize()) {
+ PriorityStates states;
+ NextAndGetToGroups(states, clist.GetState(pos), letter, strpos, used);
+ UpdateNList(nlist, states);
+ ++pos;
+ }
+ DoSwap(clist, nlist);
+ return 0;
+ }
+
+ void TakeAction(State&, Action) const {}
+
+ Action Next(State& st, Char letter) const
+ {
+ return NextTranslated(st, Translate(letter));
+ }
+
+public:
+ SlowCapturingScanner()
+ : SlowScanner(true)
+ {
+ }
SlowCapturingScanner(Fsm& fsm, size_t distance = 0)
: SlowScanner(fsm, true, false, distance)
- {
- }
-};
+ {
+ }
+};
namespace Features {
Feature::Ptr Capture(size_t pos);
diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp
index 984d708dfa..b3709c591f 100644
--- a/contrib/libs/pire/pire/fsm.cpp
+++ b/contrib/libs/pire/pire/fsm.cpp
@@ -837,11 +837,11 @@ bool Fsm::LettersEquality::operator()(Char a, Char b) const
return true;
}
-void Fsm::Sparse(bool needEpsilons /* = false */)
+void Fsm::Sparse(bool needEpsilons /* = false */)
{
letters = LettersTbl(LettersEquality(m_transitions));
for (unsigned letter = 0; letter < MaxChar; ++letter)
- if (letter != Epsilon || needEpsilons)
+ if (letter != Epsilon || needEpsilons)
letters.Append(letter);
m_sparsed = true;
diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h
index 4dad06ca06..275cdd2904 100644
--- a/contrib/libs/pire/pire/fsm.h
+++ b/contrib/libs/pire/pire/fsm.h
@@ -201,7 +201,7 @@ namespace Pire {
/// Builds letters equivalence classes
- void Sparse(bool needEpsilons = false);
+ void Sparse(bool needEpsilons = false);
/// Unpacks all letters equivalence classs back into transitions table
void Unsparse();
diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y
index dbad88e287..29aad9063f 100644
--- a/contrib/libs/pire/pire/re_parser.y
+++ b/contrib/libs/pire/pire/re_parser.y
@@ -139,7 +139,7 @@ iteration
cur += (orig | Fsm()) * (repc.second - repc.first);
}
}
- rlex.Parenthesized($$->As<Fsm>());
+ rlex.Parenthesized($$->As<Fsm>());
delete $1;
delete $2;
}
diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h
index f6e1ff734d..101611e566 100644
--- a/contrib/libs/pire/pire/run.h
+++ b/contrib/libs/pire/pire/run.h
@@ -290,15 +290,15 @@ std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool thr
{
typename Scanner::State st;
sc.Initialize(st);
- if (throughBeginMark)
- Pire::Step(sc, st, BeginMark);
+ if (throughBeginMark)
+ Pire::Step(sc, st, BeginMark);
const char* pos = (sc.Final(st) ? str.data() : nullptr);
Impl::DoRun(sc, st, str, Impl::LongestPrefixPred<Scanner>(pos));
- if (throughEndMark) {
- Pire::Step(sc, st, EndMark);
- if (sc.Final(st))
+ if (throughEndMark) {
+ Pire::Step(sc, st, EndMark);
+ if (sc.Final(st))
pos = str.data() + str.size();
- }
+ }
return pos ? str.substr(0, pos - str.data()) : std::string_view{};
}
@@ -316,17 +316,17 @@ std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool th
{
typename Scanner::State st;
sc.Initialize(st);
- if (throughBeginMark)
- Pire::Step(sc, st, BeginMark);
+ if (throughBeginMark)
+ Pire::Step(sc, st, BeginMark);
if (sc.Final(st))
return str.substr(0, 0);
const char* pos = nullptr;
Impl::DoRun(sc, st, str, Impl::ShortestPrefixPred<Scanner>(pos));
- if (throughEndMark) {
- Pire::Step(sc, st, EndMark);
+ if (throughEndMark) {
+ Pire::Step(sc, st, EndMark);
if (sc.Final(st) && !pos)
pos = str.data() + str.size();
- }
+ }
return pos ? str.substr(0, pos - str.data()) : std::string_view{};
}
@@ -347,8 +347,8 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s
{
typename Scanner::State state;
scanner.Initialize(state);
- if (throughEndMark)
- Step(scanner, state, EndMark);
+ if (throughEndMark)
+ Step(scanner, state, EndMark);
PIRE_IFDEBUG(Cdbg << "Running LongestSuffix on string " << ystring(str) << Endl);
PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
@@ -363,11 +363,11 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s
}
if (scanner.Final(state))
suffix = str.substr(begin - str.data());
- if (throughBeginMark) {
- Step(scanner, state, BeginMark);
- if (scanner.Final(state))
+ if (throughBeginMark) {
+ Step(scanner, state, BeginMark);
+ if (scanner.Final(state))
suffix = str.substr(begin - str.data());
- }
+ }
return suffix;
}
@@ -386,8 +386,8 @@ inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view
auto begin = str.data() + str.size();
typename Scanner::State state;
scanner.Initialize(state);
- if (throughEndMark)
- Step(scanner, state, EndMark);
+ if (throughEndMark)
+ Step(scanner, state, EndMark);
PIRE_IFDEBUG(Cdbg << "Running ShortestSuffix on string " << ystring(str) << Endl);
PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
@@ -396,8 +396,8 @@ inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view
scanner.Next(state, (unsigned char)*begin);
PIRE_IFDEBUG(Cdbg << *rbegin << " => state " << StDump(scanner, state) << Endl);
}
- if (throughBeginMark)
- Step(scanner, state, BeginMark);
+ if (throughBeginMark)
+ Step(scanner, state, BeginMark);
return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{};
}
diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp
index 3956e3c6ed..24751dd776 100644
--- a/contrib/libs/pire/pire/scanner_io.cpp
+++ b/contrib/libs/pire/pire/scanner_io.cpp
@@ -97,15 +97,15 @@ void SlowScanner::Save(yostream* s) const
size += sizeof(unsigned) * i.size();
}
Impl::AlignSave(s, size);
- if (need_actions) {
- size_t pos = 0;
- for (TVector< TVector< Action > >::const_iterator i = m_actionsvec.begin(), ie = m_actionsvec.end(); i != ie; ++i)
- if (!i->empty()) {
- SavePodArray(s, &(*i)[0], i->size());
- pos += sizeof(Action) * i->size();
- }
- Impl::AlignSave(s, pos);
- }
+ if (need_actions) {
+ size_t pos = 0;
+ for (TVector< TVector< Action > >::const_iterator i = m_actionsvec.begin(), ie = m_actionsvec.end(); i != ie; ++i)
+ if (!i->empty()) {
+ SavePodArray(s, &(*i)[0], i->size());
+ pos += sizeof(Action) * i->size();
+ }
+ Impl::AlignSave(s, pos);
+ }
}
}
@@ -118,13 +118,13 @@ void SlowScanner::Load(yistream* s)
bool empty;
LoadPodType(s, empty);
Impl::AlignLoad(s, sizeof(empty));
- sc.need_actions = need_actions;
+ sc.need_actions = need_actions;
if (empty) {
sc.Alias(Null());
} else {
sc.m_vec.resize(sc.m.lettersCount * sc.m.statesCount);
- if (sc.need_actions)
- sc.m_actionsvec.resize(sc.m.lettersCount * sc.m.statesCount);
+ if (sc.need_actions)
+ sc.m_actionsvec.resize(sc.m.lettersCount * sc.m.statesCount);
sc.m_vecptr = &sc.m_vec;
sc.alloc(sc.m_letters, MaxChar);
@@ -140,10 +140,10 @@ void SlowScanner::Load(yistream* s)
size_t n;
LoadPodType(s, n);
i.resize(n - c);
- if (sc.need_actions) {
- act->resize(n - c);
- ++act;
- }
+ if (sc.need_actions) {
+ act->resize(n - c);
+ ++act;
+ }
c = n;
}
Impl::AlignLoad(s, (m_vec.size() + 1) * sizeof(size_t));
@@ -155,16 +155,16 @@ void SlowScanner::Load(yistream* s)
size += sizeof(unsigned) * i.size();
}
Impl::AlignLoad(s, size);
- size_t actSize = 0;
- if (sc.need_actions) {
+ size_t actSize = 0;
+ if (sc.need_actions) {
for (auto&& i : sc.m_actionsvec) {
if (!i.empty()) {
LoadPodArray(s, &(i)[0], i.size());
actSize += sizeof(Action) * i.size();
- }
- }
- Impl::AlignLoad(s, actSize);
- }
+ }
+ }
+ Impl::AlignLoad(s, actSize);
+ }
}
Swap(sc);
}
diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h
index 6adfcb8c1d..0417e25569 100644
--- a/contrib/libs/pire/pire/scanners/slow.h
+++ b/contrib/libs/pire/pire/scanners/slow.h
@@ -74,13 +74,13 @@ public:
#endif
};
- SlowScanner(bool needActions = false) {
- Alias(Null());
- need_actions = needActions;
- }
-
- size_t GetLettersCount() const {return m.lettersCount; };
+ SlowScanner(bool needActions = false) {
+ Alias(Null());
+ need_actions = needActions;
+ }
+ size_t GetLettersCount() const {return m.lettersCount; };
+
size_t Size() const { return GetSize(); }
size_t GetSize() const { return m.statesCount; }
bool Empty() const { return m_finals == Null().m_finals; }
@@ -194,8 +194,8 @@ public:
Impl::MapPtr(s.m_finals, s.m.statesCount, p, size);
Impl::MapPtr(s.m_jumpPos, s.m.statesCount * s.m.lettersCount + 1, p, size);
Impl::MapPtr(s.m_jumps, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size);
- if (need_actions)
- Impl::MapPtr(s.m_actions, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size);
+ if (need_actions)
+ Impl::MapPtr(s.m_actions, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size);
Swap(s);
}
return (const void*) p;
@@ -205,7 +205,7 @@ public:
{
DoSwap(m_finals, s.m_finals);
DoSwap(m_jumps, s.m_jumps);
- DoSwap(m_actions, s.m_actions);
+ DoSwap(m_actions, s.m_actions);
DoSwap(m_jumpPos, s.m_jumpPos);
DoSwap(m.statesCount, s.m.statesCount);
DoSwap(m.lettersCount, s.m.lettersCount);
@@ -215,8 +215,8 @@ public:
DoSwap(m_vec, s.m_vec);
DoSwap(m_vecptr, s.m_vecptr);
- DoSwap(need_actions, s.need_actions);
- DoSwap(m_actionsvec, s.m_actionsvec);
+ DoSwap(need_actions, s.need_actions);
+ DoSwap(m_actionsvec, s.m_actionsvec);
if (m_vecptr == &s.m_vec)
m_vecptr = &m_vec;
if (s.m_vecptr == &m_vec)
@@ -226,14 +226,14 @@ public:
SlowScanner(const SlowScanner& s)
: m(s.m)
, m_vec(s.m_vec)
- , need_actions(s.need_actions)
- , m_actionsvec(s.m_actionsvec)
+ , need_actions(s.need_actions)
+ , m_actionsvec(s.m_actionsvec)
{
if (s.m_vec.empty()) {
// Empty or mmap()-ed scanner, just copy pointers
m_finals = s.m_finals;
m_jumps = s.m_jumps;
- m_actions = s.m_actions;
+ m_actions = s.m_actions;
m_jumpPos = s.m_jumpPos;
m_letters = s.m_letters;
m_vecptr = 0;
@@ -243,7 +243,7 @@ public:
memcpy(m_letters, s.m_letters, sizeof(*m_letters) * MaxChar);
m_jumps = 0;
m_jumpPos = 0;
- m_actions = 0;
+ m_actions = 0;
alloc(m_finals, m.statesCount);
memcpy(m_finals, s.m_finals, sizeof(*m_finals) * m.statesCount);
m_vecptr = &m_vec;
@@ -251,25 +251,25 @@ public:
}
explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0)
- : need_actions(needActions)
+ : need_actions(needActions)
{
if (distance) {
fsm = CreateApproxFsm(fsm, distance);
}
- if (removeEpsilons)
- fsm.RemoveEpsilons();
- fsm.Sparse(!removeEpsilons);
+ if (removeEpsilons)
+ fsm.RemoveEpsilons();
+ fsm.Sparse(!removeEpsilons);
m.statesCount = fsm.Size();
m.lettersCount = fsm.Letters().Size();
m_vec.resize(m.statesCount * m.lettersCount);
- if (need_actions)
- m_actionsvec.resize(m.statesCount * m.lettersCount);
+ if (need_actions)
+ m_actionsvec.resize(m.statesCount * m.lettersCount);
m_vecptr = &m_vec;
alloc(m_letters, MaxChar);
m_jumps = 0;
- m_actions = 0;
+ m_actions = 0;
m_jumpPos = 0;
alloc(m_finals, m.statesCount);
@@ -297,47 +297,47 @@ public:
const State& StateIndex(const State& s) const { return s; }
-protected:
- bool IsMmaped() const
- {
- return (!m_vecptr);
- }
-
- size_t GetJump(size_t pos) const
- {
- return m_jumps[pos];
- }
-
- Action& GetAction(size_t pos) const
- {
- return m_actions[pos];
- }
-
- const TVector<Action>& GetActionsVec(size_t from) const
- {
- return m_actionsvec[from];
- }
-
- const TVector<unsigned int>& GetJumpsVec(size_t from) const
- {
- return m_vec[from];
- }
-
- size_t* GetJumpPos() const
- {
- return m_jumpPos;
- }
-
- size_t GetStart() const
- {
- return m.start;
- }
-
- bool IsFinal(size_t pos) const
- {
- return m_finals[pos];
- }
-
+protected:
+ bool IsMmaped() const
+ {
+ return (!m_vecptr);
+ }
+
+ size_t GetJump(size_t pos) const
+ {
+ return m_jumps[pos];
+ }
+
+ Action& GetAction(size_t pos) const
+ {
+ return m_actions[pos];
+ }
+
+ const TVector<Action>& GetActionsVec(size_t from) const
+ {
+ return m_actionsvec[from];
+ }
+
+ const TVector<unsigned int>& GetJumpsVec(size_t from) const
+ {
+ return m_vec[from];
+ }
+
+ size_t* GetJumpPos() const
+ {
+ return m_jumpPos;
+ }
+
+ size_t GetStart() const
+ {
+ return m.start;
+ }
+
+ bool IsFinal(size_t pos) const
+ {
+ return m_finals[pos];
+ }
+
private:
struct Locals {
@@ -348,15 +348,15 @@ private:
bool* m_finals;
unsigned* m_jumps;
- Action* m_actions;
+ Action* m_actions;
size_t* m_jumpPos;
size_t* m_letters;
TVector<void*> m_pool;
TVector< TVector<unsigned> > m_vec, *m_vecptr;
- bool need_actions;
- TVector<TVector<Action>> m_actionsvec;
+ bool need_actions;
+ TVector<TVector<Action>> m_actionsvec;
static const SlowScanner& Null();
template<class T> void alloc(T*& p, size_t size)
@@ -370,18 +370,18 @@ private:
{
memcpy(&m, &s.m, sizeof(m));
m_vec.clear();
- need_actions = s.need_actions;
- m_actionsvec.clear();
+ need_actions = s.need_actions;
+ m_actionsvec.clear();
m_finals = s.m_finals;
m_jumps = s.m_jumps;
- m_actions = s.m_actions;
+ m_actions = s.m_actions;
m_jumpPos = s.m_jumpPos;
m_letters = s.m_letters;
m_vecptr = s.m_vecptr;
m_pool.clear();
}
- void SetJump(size_t oldState, Char c, size_t newState, unsigned long action)
+ void SetJump(size_t oldState, Char c, size_t newState, unsigned long action)
{
Y_ASSERT(!m_vec.empty());
Y_ASSERT(oldState < m.statesCount);
@@ -389,8 +389,8 @@ private:
size_t idx = oldState * m.lettersCount + m_letters[c];
m_vec[idx].push_back(newState);
- if (need_actions)
- m_actionsvec[idx].push_back(action);
+ if (need_actions)
+ m_actionsvec[idx].push_back(action);
}
unsigned long RemapAction(unsigned long action) { return action; }
diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h
index 94bba4064b..92ce16e14f 100644
--- a/library/cpp/regex/pire/regexp.h
+++ b/library/cpp/regex/pire/regexp.h
@@ -48,7 +48,7 @@ namespace NRegExp {
};
static inline NPire::TFsm Parse(const TStringBuf& regexp,
- const TOptions& opts, const bool needDetermine = true) {
+ const TOptions& opts, const bool needDetermine = true) {
NPire::TLexer lexer;
if (opts.Charset == CODES_UNKNOWN) {
lexer.Assign(regexp.data(), regexp.data() + regexp.size());
@@ -98,9 +98,9 @@ namespace NRegExp {
ret.Surround();
}
- if (needDetermine) {
- ret.Determine();
- }
+ if (needDetermine) {
+ ret.Determine();
+ }
return ret;
}
@@ -113,8 +113,8 @@ namespace NRegExp {
public:
inline explicit TFsmParser(const TStringBuf& regexp,
- const TOptions& opts = TOptions(), bool needDetermine = true)
- : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
+ const TOptions& opts = TOptions(), bool needDetermine = true)
+ : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>())
{
}
@@ -177,19 +177,19 @@ namespace NRegExp {
}
};
- struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
+ struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> {
inline explicit TSlowCapturingFsm(const TStringBuf& regexp,
- TOptions opts = TOptions())
- : TFsmParser<TScanner>(regexp,
- opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) {
- }
-
- inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm)
- : TFsmParser<TScanner>(fsm)
- {
- }
- };
-
+ TOptions opts = TOptions())
+ : TFsmParser<TScanner>(regexp,
+ opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) {
+ }
+
+ inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm)
+ : TFsmParser<TScanner>(fsm)
+ {
+ }
+ };
+
template <class TFsm>
class TMatcherBase {
public:
@@ -285,53 +285,53 @@ namespace NRegExp {
private:
TStringBuf Data;
};
-
- class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{
- public:
- typedef typename TSlowCapturingFsm::TScanner::State TState;
- inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm)
- : TMatcherBase<TSlowCapturingFsm>(fsm)
- , HasCaptured(false)
- {
- }
-
- inline bool Captured() const noexcept {
- return HasCaptured;
- }
-
- inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
- TStringBuf textData(data, len);
- Data = textData;
+
+ class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{
+ public:
+ typedef typename TSlowCapturingFsm::TScanner::State TState;
+ inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm)
+ : TMatcherBase<TSlowCapturingFsm>(fsm)
+ , HasCaptured(false)
+ {
+ }
+
+ inline bool Captured() const noexcept {
+ return HasCaptured;
+ }
+
+ inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept {
+ TStringBuf textData(data, len);
+ Data = textData;
Run(Data.begin(), Data.size(), addBegin, addEnd);
- return GetAns();
- }
-
+ return GetAns();
+ }
+
inline TSlowSearcher& Search(const TStringBuf& s) noexcept {
return Search(s.data(), s.size());
- }
-
- inline TStringBuf GetCaptured() const noexcept {
- return Ans;
- }
-
- private:
- TStringBuf Data;
- TStringBuf Ans;
- bool HasCaptured;
-
- inline TSlowSearcher& GetAns() {
- auto state = GetState();
- Pire::SlowCapturingScanner::SingleState final;
- if (!GetScanner().GetCapture(state, final)) {
- HasCaptured = false;
- } else {
- if (!final.HasEnd()) {
- final.SetEnd(Data.size());
- }
- Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin());
- HasCaptured = true;
- }
- return *this;
- }
- };
+ }
+
+ inline TStringBuf GetCaptured() const noexcept {
+ return Ans;
+ }
+
+ private:
+ TStringBuf Data;
+ TStringBuf Ans;
+ bool HasCaptured;
+
+ inline TSlowSearcher& GetAns() {
+ auto state = GetState();
+ Pire::SlowCapturingScanner::SingleState final;
+ if (!GetScanner().GetCapture(state, final)) {
+ HasCaptured = false;
+ } else {
+ if (!final.HasEnd()) {
+ final.SetEnd(Data.size());
+ }
+ Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin());
+ HasCaptured = true;
+ }
+ return *this;
+ }
+ };
}
diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp
index e7206de9ad..19694cfe59 100644
--- a/library/cpp/regex/pire/ut/regexp_ut.cpp
+++ b/library/cpp/regex/pire/ut/regexp_ut.cpp
@@ -216,95 +216,95 @@ Y_UNIT_TEST_SUITE(TRegExp) {
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("xx"));
}
- Y_UNIT_TEST(SlowCapture) {
- TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",
- TFsm::TOptions().SetCapture(2));
- TSlowSearcher searcher(fsm);
- searcher.Search("http://vkontakte.ru/id100500");
- UNIT_ASSERT(searcher.Captured());
+ Y_UNIT_TEST(SlowCapture) {
+ TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)",
+ TFsm::TOptions().SetCapture(2));
+ TSlowSearcher searcher(fsm);
+ searcher.Search("http://vkontakte.ru/id100500");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("100500"));
- }
-
- Y_UNIT_TEST(SlowCaptureGreedy) {
- TSlowCapturingFsm fsm(".*(pref.*suff)");
- TSlowSearcher searcher(fsm);
- searcher.Search("pref ala bla pref cla suff dla");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCaptureGreedy) {
+ TSlowCapturingFsm fsm(".*(pref.*suff)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("pref ala bla pref cla suff dla");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref cla suff"));
- }
-
- Y_UNIT_TEST(SlowCaptureNonGreedy) {
- TSlowCapturingFsm fsm(".*?(pref.*suff)");
- TSlowSearcher searcher(fsm);
- searcher.Search("pref ala bla pref cla suff dla");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCaptureNonGreedy) {
+ TSlowCapturingFsm fsm(".*?(pref.*suff)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("pref ala bla pref cla suff dla");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref ala bla pref cla suff"));
- }
-
- Y_UNIT_TEST(SlowCapture2) {
- TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",
- TFsm::TOptions().SetCharset(CODES_UTF8));
-
- TSlowSearcher searcher(fsm);
- searcher.Search(" Здравствуйте, Уважаемый (-ая)! ");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCapture2) {
+ TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!",
+ TFsm::TOptions().SetCharset(CODES_UTF8));
+
+ TSlowSearcher searcher(fsm);
+ searcher.Search(" Здравствуйте, Уважаемый (-ая)! ");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)"));
- }
-
- Y_UNIT_TEST(SlowCapture3) {
- TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);");
- TSlowSearcher searcher(fsm);
- searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCapture3) {
+ TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("0x0d0a"));
- }
-
- Y_UNIT_TEST(SlowCapture4) {
- TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\"");
- TSlowSearcher searcher(fsm);
- searcher.Search("\"/away.php?to=http:some.addr\"&id=1");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(SlowCapture4) {
+ TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\"");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("\"/away.php?to=http:some.addr\"&id=1");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("some.addr"));
- }
-
- Y_UNIT_TEST(CapturedEmptySlow) {
- TSlowCapturingFsm fsm("Comments=(.*)$");
- TSlowSearcher searcher(fsm);
- searcher.Search("And Comments=");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CapturedEmptySlow) {
+ TSlowCapturingFsm fsm("Comments=(.*)$");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("And Comments=");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf(""));
- }
-
- Y_UNIT_TEST(CaptureInOrFirst) {
- TSlowCapturingFsm fsm("(A)|A");
- TSlowSearcher searcher(fsm);
- searcher.Search("A");
- UNIT_ASSERT(searcher.Captured());
- }
-
- Y_UNIT_TEST(CaptureInOrSecond) {
- TSlowCapturingFsm fsm("A|(A)");
- TSlowSearcher searcher(fsm);
- searcher.Search("A");
- UNIT_ASSERT(!searcher.Captured());
- }
-
- Y_UNIT_TEST(CaptureOutside) {
- TSlowCapturingFsm fsm("((ID=([0-9]+))?)");
- TSlowSearcher searcher(fsm);
- searcher.Search("ID=");
- UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CaptureInOrFirst) {
+ TSlowCapturingFsm fsm("(A)|A");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("A");
+ UNIT_ASSERT(searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CaptureInOrSecond) {
+ TSlowCapturingFsm fsm("A|(A)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("A");
+ UNIT_ASSERT(!searcher.Captured());
+ }
+
+ Y_UNIT_TEST(CaptureOutside) {
+ TSlowCapturingFsm fsm("((ID=([0-9]+))?)");
+ TSlowSearcher searcher(fsm);
+ searcher.Search("ID=");
+ UNIT_ASSERT(searcher.Captured());
UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf(""));
- }
-
- Y_UNIT_TEST(CaptureInside) {
- TSlowCapturingFsm fsm("((ID=([0-9]+))?)",
- TFsm::TOptions().SetCapture(2));
- TSlowSearcher searcher(fsm);
- searcher.Search("ID=");
- UNIT_ASSERT(!searcher.Captured());
- }
-
+ }
+
+ Y_UNIT_TEST(CaptureInside) {
+ TSlowCapturingFsm fsm("((ID=([0-9]+))?)",
+ TFsm::TOptions().SetCapture(2));
+ TSlowSearcher searcher(fsm);
+ searcher.Search("ID=");
+ UNIT_ASSERT(!searcher.Captured());
+ }
+
Y_UNIT_TEST(Pcre2PireTest) {
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)");
UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?");