diff options
author | iblinnikov <iblinnikov@yandex-team.ru> | 2022-02-10 16:48:07 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:07 +0300 |
commit | b420f761873190614f41ed39c6d96bd3dc14fd00 (patch) | |
tree | 88f25d43498bcac1aa20e80ca2979e17a9f95018 | |
parent | 1916d87e4a1be8b60140240d49f0572a22e54bf8 (diff) | |
download | ydb-b420f761873190614f41ed39c6d96bd3dc14fd00.tar.gz |
Restoring authorship annotation for <iblinnikov@yandex-team.ru>. Commit 1 of 2.
-rw-r--r-- | contrib/libs/pire/pire/extra/capture.cpp | 94 | ||||
-rw-r--r-- | contrib/libs/pire/pire/extra/capture.h | 810 | ||||
-rw-r--r-- | contrib/libs/pire/pire/fsm.cpp | 4 | ||||
-rw-r--r-- | contrib/libs/pire/pire/fsm.h | 2 | ||||
-rw-r--r-- | contrib/libs/pire/pire/re_parser.y | 2 | ||||
-rw-r--r-- | contrib/libs/pire/pire/run.h | 42 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanner_io.cpp | 44 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/slow.h | 144 | ||||
-rw-r--r-- | library/cpp/regex/pire/regexp.h | 128 | ||||
-rw-r--r-- | library/cpp/regex/pire/ut/regexp_ut.cpp | 162 |
10 files changed, 716 insertions, 716 deletions
diff --git a/contrib/libs/pire/pire/extra/capture.cpp b/contrib/libs/pire/pire/extra/capture.cpp index fb4cdf6d81..a3e663af83 100644 --- a/contrib/libs/pire/pire/extra/capture.cpp +++ b/contrib/libs/pire/pire/extra/capture.cpp @@ -34,25 +34,25 @@ namespace { : State(0) , Pos(pos) , Level(0) - , StateRepetition(NoRepetition) + , StateRepetition(NoRepetition) {} - bool Accepts(wchar32 c) const { return c == '(' || c == '+' || c == '*' || c == '?' || c == '{'; } + bool Accepts(wchar32 c) const { return c == '(' || c == '+' || c == '*' || c == '?' || c == '{'; } Term Lex() { - wchar32 c = GetChar(); - if (!Accepts(c)) + wchar32 c = GetChar(); + if (!Accepts(c)) Error("How did we get here?!.."); - if (c != '(') { - wchar32 next = PeekChar(); - if (next == '?') { - StateRepetition = NonGreedyRepetition; - GetChar(); - } - else - StateRepetition = GreedyRepetition; - } - else if (State == 0 && Pos > 1) + if (c != '(') { + wchar32 next = PeekChar(); + if (next == '?') { + StateRepetition = NonGreedyRepetition; + GetChar(); + } + else + StateRepetition = GreedyRepetition; + } + else if (State == 0 && Pos > 1) --Pos; else if (State == 0 && Pos == 1) { State = 1; @@ -60,27 +60,27 @@ namespace { } else if (State == 1) { ++Level; } - if (c == '(') - return Term(TokenTypes::Open); - else if (c == '+') - return Term::Repetition(1, Inf); - else if (c == '*') - return Term::Repetition(0, Inf); - else if (c == '?') - return Term::Repetition(0, 1); - else { - UngetChar(c); - return Term(0); - } + if (c == '(') + return Term(TokenTypes::Open); + else if (c == '+') + return Term::Repetition(1, Inf); + else if (c == '*') + return Term::Repetition(0, Inf); + else if (c == '?') + return Term::Repetition(0, 1); + else { + UngetChar(c); + return Term(0); + } } void Parenthesized(Fsm& fsm) { - if (StateRepetition != NoRepetition) { - bool greedy = (StateRepetition == GreedyRepetition); - SetRepetitionMark(fsm, greedy); - StateRepetition = NoRepetition; - } else if (State == 1 && Level == 0) { + if (StateRepetition != NoRepetition) { + bool greedy = (StateRepetition == GreedyRepetition); + SetRepetitionMark(fsm, greedy); + StateRepetition = NoRepetition; + } else if (State == 1 && Level == 0) { SetCaptureMark(fsm); State = 2; } else if (State == 1 && Level > 0) @@ -90,23 +90,23 @@ namespace { unsigned State; size_t Pos; size_t Level; - RepetitionTypes StateRepetition; - - void SetRepetitionMark(Fsm& fsm, bool greedy) - { + RepetitionTypes StateRepetition; + + void SetRepetitionMark(Fsm& fsm, bool greedy) + { fsm.Resize(fsm.Size() + 1); - fsm.ConnectFinal(fsm.Size() - 1); - - for (size_t state = 0; state < fsm.Size() - 1; ++state) - if (fsm.IsFinal(state)) - if (greedy) - fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndRepetition); - else - fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndNonGreedyRepetition); - fsm.ClearFinal(); - fsm.SetFinal(fsm.Size() - 1, true); - fsm.SetIsDetermined(false); - } + fsm.ConnectFinal(fsm.Size() - 1); + + for (size_t state = 0; state < fsm.Size() - 1; ++state) + if (fsm.IsFinal(state)) + if (greedy) + fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndRepetition); + else + fsm.SetOutput(state, fsm.Size() - 1, SlowCapturingScanner::EndNonGreedyRepetition); + fsm.ClearFinal(); + fsm.SetFinal(fsm.Size() - 1, true); + fsm.SetIsDetermined(false); + } void SetCaptureMark(Fsm& fsm) { diff --git a/contrib/libs/pire/pire/extra/capture.h b/contrib/libs/pire/pire/extra/capture.h index 8399914a67..9951de166a 100644 --- a/contrib/libs/pire/pire/extra/capture.h +++ b/contrib/libs/pire/pire/extra/capture.h @@ -27,14 +27,14 @@ #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/scanners/loaded.h> -#include <contrib/libs/pire/pire/scanners/multi.h> -#include <contrib/libs/pire/pire/scanners/slow.h> +#include <contrib/libs/pire/pire/scanners/multi.h> +#include <contrib/libs/pire/pire/scanners/slow.h> #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/re_lexer.h> -#include <contrib/libs/pire/pire/run.h> - -#include <array> +#include <contrib/libs/pire/pire/run.h> +#include <array> + namespace Pire { /** @@ -158,67 +158,67 @@ private: friend void BuildScanner<CapturingScanner>(const Fsm&, CapturingScanner&); }; - -enum RepetitionTypes { // They are sorted by their priorities - NonGreedyRepetition, - NoRepetition, - GreedyRepetition, -}; - -/** -* A Slowcapturing scanner. -* Does not require FSM to be deterministic, uses O(|text| * |regexp|) time, but always returns the correct capture and supports both greedy and non-greedy quantifiers. -*/ -class SlowCapturingScanner : public SlowScanner { -public: - enum { - Nothing = 0, - BeginCapture = 1, - EndCapture = 2, - EndRepetition = 4, - EndNonGreedyRepetition = 8, - - FinalFlag = 1, - }; - - const ui32 ActionsCapture = BeginCapture | EndCapture; - - class SingleState { - public: - bool Captured() const - { - return (m_begin != m_npos && m_end != m_npos); - } - - bool HasBegin() const - { - return (m_begin != m_npos); - } - - bool HasEnd() const - { - return (m_end != m_npos); - } - - SingleState(size_t num = 0) - { - m_state = num; - m_begin = m_npos; - m_end = m_npos; - } - - void SetBegin(size_t pos) - { - if (m_begin == m_npos) - m_begin = pos; - } - - void SetEnd(size_t pos) - { - if (m_end == m_npos) - m_end = pos; - } - + +enum RepetitionTypes { // They are sorted by their priorities + NonGreedyRepetition, + NoRepetition, + GreedyRepetition, +}; + +/** +* A Slowcapturing scanner. +* Does not require FSM to be deterministic, uses O(|text| * |regexp|) time, but always returns the correct capture and supports both greedy and non-greedy quantifiers. +*/ +class SlowCapturingScanner : public SlowScanner { +public: + enum { + Nothing = 0, + BeginCapture = 1, + EndCapture = 2, + EndRepetition = 4, + EndNonGreedyRepetition = 8, + + FinalFlag = 1, + }; + + const ui32 ActionsCapture = BeginCapture | EndCapture; + + class SingleState { + public: + bool Captured() const + { + return (m_begin != m_npos && m_end != m_npos); + } + + bool HasBegin() const + { + return (m_begin != m_npos); + } + + bool HasEnd() const + { + return (m_end != m_npos); + } + + SingleState(size_t num = 0) + { + m_state = num; + m_begin = m_npos; + m_end = m_npos; + } + + void SetBegin(size_t pos) + { + if (m_begin == m_npos) + m_begin = pos; + } + + void SetEnd(size_t pos) + { + if (m_end == m_npos) + m_end = pos; + } + size_t Begin() const { return GetBegin(); @@ -229,358 +229,358 @@ public: return GetEnd(); } - size_t GetBegin() const - { - return m_begin; - } - - size_t GetEnd() const - { - return m_end; - } - - size_t GetNum() const - { - return m_state; - } - - private: - size_t m_state; - size_t m_begin; - size_t m_end; - static const size_t m_npos = static_cast<size_t>(-1); - }; - - class State { - public: - State() + size_t GetBegin() const + { + return m_begin; + } + + size_t GetEnd() const + { + return m_end; + } + + size_t GetNum() const + { + return m_state; + } + + private: + size_t m_state; + size_t m_begin; + size_t m_end; + static const size_t m_npos = static_cast<size_t>(-1); + }; + + class State { + public: + State() : m_strpos(0) , m_matched(false) {} - - size_t GetPos() const - { - return m_strpos; - } - - const SingleState& GetState(size_t pos) const - { - return m_states[pos]; - } - - void SetPos(size_t newPos) - { - m_strpos = newPos; - } - - void PushState(SingleState& st) - { - m_states.push_back(st); - } - - void PushState(const SingleState& st) - { - m_states.push_back(st); - } - - size_t GetSize() const - { - return m_states.size(); - } - - const TVector<SingleState>& GetStates() const - { - return m_states; - } - - bool IsMatched() const - { - return m_matched; - } - - const SingleState& GetMatched() const - { - return m_match; - } - - void AddMatch(const SingleState& Matched) - { - m_matched = true; - m_match = Matched; - } - - private: - TVector<SingleState> m_states; - size_t m_strpos; - bool m_matched; - SingleState m_match; - }; - - class Transition { - private: - unsigned long m_stateto; - Action m_action; - - public: - unsigned long GetState() const - { - return m_stateto; - } - - Action GetAction() const - { - return m_action; - } - - Transition(unsigned long state, Action act = 0) + + size_t GetPos() const + { + return m_strpos; + } + + const SingleState& GetState(size_t pos) const + { + return m_states[pos]; + } + + void SetPos(size_t newPos) + { + m_strpos = newPos; + } + + void PushState(SingleState& st) + { + m_states.push_back(st); + } + + void PushState(const SingleState& st) + { + m_states.push_back(st); + } + + size_t GetSize() const + { + return m_states.size(); + } + + const TVector<SingleState>& GetStates() const + { + return m_states; + } + + bool IsMatched() const + { + return m_matched; + } + + const SingleState& GetMatched() const + { + return m_match; + } + + void AddMatch(const SingleState& Matched) + { + m_matched = true; + m_match = Matched; + } + + private: + TVector<SingleState> m_states; + size_t m_strpos; + bool m_matched; + SingleState m_match; + }; + + class Transition { + private: + unsigned long m_stateto; + Action m_action; + + public: + unsigned long GetState() const + { + return m_stateto; + } + + Action GetAction() const + { + return m_action; + } + + Transition(unsigned long state, Action act = 0) : m_stateto(state) , m_action(act) - { - } - }; - - class PriorityStates { - private: - TVector<SingleState> m_nonGreedy; - TVector<SingleState> m_nothing; - TVector<SingleState> m_greedy; - - public: - void Push(const SingleState& st, RepetitionTypes repetition) - { - Get(repetition).push_back(st); - } - - TVector<SingleState>& Get(RepetitionTypes repetition) - { - switch (repetition) { - case NonGreedyRepetition: - return m_nonGreedy; - case NoRepetition: - return m_nothing; - case GreedyRepetition: - return m_greedy; - } - } - - const TVector<SingleState>& Get(RepetitionTypes repetition) const - { - switch (repetition) { - case NonGreedyRepetition: - return m_nonGreedy; - case NoRepetition: - return m_nothing; - case GreedyRepetition: - return m_greedy; - } - } - }; - - SlowScanner::State GetNextStates(const SingleState& cur, Char letter) const - { + { + } + }; + + class PriorityStates { + private: + TVector<SingleState> m_nonGreedy; + TVector<SingleState> m_nothing; + TVector<SingleState> m_greedy; + + public: + void Push(const SingleState& st, RepetitionTypes repetition) + { + Get(repetition).push_back(st); + } + + TVector<SingleState>& Get(RepetitionTypes repetition) + { + switch (repetition) { + case NonGreedyRepetition: + return m_nonGreedy; + case NoRepetition: + return m_nothing; + case GreedyRepetition: + return m_greedy; + } + } + + const TVector<SingleState>& Get(RepetitionTypes repetition) const + { + switch (repetition) { + case NonGreedyRepetition: + return m_nonGreedy; + case NoRepetition: + return m_nothing; + case GreedyRepetition: + return m_greedy; + } + } + }; + + SlowScanner::State GetNextStates(const SingleState& cur, Char letter) const + { SlowScanner::State st(GetSize()); st.states.push_back(cur.GetNum()); SlowScanner::State nextState(GetSize()); SlowScanner::NextTranslated(st, nextState, letter); return nextState; - } - - size_t GetPosition(const SingleState& state, Char letter) const - { - return state.GetNum() * GetLettersCount() + letter; - } - - void NextStates(const SingleState& state, Char letter, TVector<Transition>& nextStates) const - { - if (IsMmaped()) { - const size_t* pos = GetJumpPos() + GetPosition(state, letter); - size_t posBegin = pos[0]; - size_t posEnd = pos[1]; - for (size_t i = posBegin; i < posEnd; ++i) - nextStates.emplace_back(GetJump(i), GetAction(i)); - } else { - size_t num = GetPosition(state, letter); - const auto& jumpVec = GetJumpsVec(num); - const auto& actionVec = GetActionsVec(num); - for (size_t i = 0; i < jumpVec.size(); ++i) - nextStates.emplace_back(jumpVec[i], actionVec[i]); - } - } - - void InsertStates(const PriorityStates& states, TVector<SingleState>& nonGreedy, TVector<SingleState>& nothing, TVector<SingleState>& greedy) const - { - for (auto& greed : {ymake_pair(&nonGreedy, NonGreedyRepetition), ymake_pair(¬hing, NoRepetition), ymake_pair(&greedy, GreedyRepetition)}) { - auto& vec = greed.first; - auto& tag = greed.second; - vec->insert(vec->end(), states.Get(tag).begin(), states.Get(tag).end()); - } - } - - void NextAndGetToGroups(PriorityStates& states, const SingleState& cur, + } + + size_t GetPosition(const SingleState& state, Char letter) const + { + return state.GetNum() * GetLettersCount() + letter; + } + + void NextStates(const SingleState& state, Char letter, TVector<Transition>& nextStates) const + { + if (IsMmaped()) { + const size_t* pos = GetJumpPos() + GetPosition(state, letter); + size_t posBegin = pos[0]; + size_t posEnd = pos[1]; + for (size_t i = posBegin; i < posEnd; ++i) + nextStates.emplace_back(GetJump(i), GetAction(i)); + } else { + size_t num = GetPosition(state, letter); + const auto& jumpVec = GetJumpsVec(num); + const auto& actionVec = GetActionsVec(num); + for (size_t i = 0; i < jumpVec.size(); ++i) + nextStates.emplace_back(jumpVec[i], actionVec[i]); + } + } + + void InsertStates(const PriorityStates& states, TVector<SingleState>& nonGreedy, TVector<SingleState>& nothing, TVector<SingleState>& greedy) const + { + for (auto& greed : {ymake_pair(&nonGreedy, NonGreedyRepetition), ymake_pair(¬hing, NoRepetition), ymake_pair(&greedy, GreedyRepetition)}) { + auto& vec = greed.first; + auto& tag = greed.second; + vec->insert(vec->end(), states.Get(tag).begin(), states.Get(tag).end()); + } + } + + void NextAndGetToGroups(PriorityStates& states, const SingleState& cur, Char letter, size_t pos, TVector<bool>& used) const - { - TVector<Transition> nextStates; - NextStates(cur, letter, nextStates); - for (const auto& trans : nextStates) { - size_t st = trans.GetState(); - if (used[st]) - continue; - used[st] = true; - SingleState state(st); - const auto& action = trans.GetAction(); - state.SetBegin(cur.GetBegin()); - state.SetEnd(cur.GetEnd()); - if (action & BeginCapture && !cur.HasBegin()) { - state.SetBegin(pos); - } - if (action & EndCapture && !cur.HasEnd()) { - state.SetEnd(pos); - } - PriorityStates statesInside; - NextAndGetToGroups(statesInside, state, Translate(Epsilon), pos, used); - statesInside.Push(state, NoRepetition); - if (action & EndNonGreedyRepetition) { - auto& nongreedy = states.Get(NonGreedyRepetition); - InsertStates(statesInside, nongreedy, nongreedy, nongreedy); - } - else if (!(action & EndRepetition)) - InsertStates(statesInside, states.Get(NonGreedyRepetition), states.Get(NoRepetition), states.Get(GreedyRepetition)); - else { - auto& greedy = states.Get(GreedyRepetition); - InsertStates(statesInside, greedy, greedy, greedy); - } - } - } - - bool Captured(const SingleState& st, bool& matched) const - { - matched = false; - if (IsFinal(st.GetNum())) { - matched = true; - if (st.HasBegin()) - return true; - } - TVector<Transition> nextStates; - NextStates(st, Translate(EndMark), nextStates); - for (const auto& trans : nextStates) - { - size_t state = trans.GetState(); - if (IsFinal(state)) { - matched = true; - if (st.HasBegin() || (trans.GetAction() & ActionsCapture)) - return true; - } else { // After EndMark there can be Epsilon-transitions to the Final State - TVector<Transition> epsilonTrans; - SingleState newSt(state); - NextStates(newSt, Translate(Epsilon), epsilonTrans); - for (auto new_trans : epsilonTrans) { - size_t fin = new_trans.GetState(); - if (IsFinal(fin)) { - matched = true; - if (st.HasBegin() || (trans.GetAction() & ActionsCapture)) - return true; - } - } - } - } - return false; - } - - bool Matched(const SingleState& st) const - { - bool matched; - Captured(st, matched); - return matched; - } - - bool GetCapture(const State& st, SingleState& final) const - { - size_t pos = 0; - bool matched = false; - bool ans = false; - while (pos < st.GetSize() && !matched) { - ans = Captured(st.GetState(pos), matched); - ++pos; - } - if (matched) { - final = st.GetState(pos - 1); - return ans; - } else { - if (st.IsMatched()) { - final = st.GetMatched(); - return true; - } - return false; - } - } - - bool PushState(State& nlist, const SingleState& state) const - { - nlist.PushState(state); - if (Matched(state)) { - nlist.AddMatch(state); - return true; - } - return false; - } - - void UpdateNList(State& nlist, const PriorityStates& states) const - { - static constexpr std::array<RepetitionTypes, 3> m_type_by_priority{{NonGreedyRepetition, NoRepetition, GreedyRepetition}}; - for (const auto type : m_type_by_priority) { - for (const auto& state : states.Get(type)) { - if (PushState(nlist, state)) // Because we have strict priorities, after matching some state, we can be sure, that not states after will be better - return; - } - } - } - - void Initialize(State& nlist) const - { - PriorityStates states; - SingleState init(GetStart()); - TVector<bool> used(GetSize()); - NextAndGetToGroups(states, init, Translate(BeginMark), 0, used); - NextAndGetToGroups(states, 0, Translate(BeginMark), 0, used); - UpdateNList(nlist, states); - } - - Action NextTranslated(State& clist, Char letter) const - { - State nlist; - if (clist.IsMatched()) - nlist.AddMatch(clist.GetMatched()); - nlist.SetPos(clist.GetPos() + 1); - size_t strpos = nlist.GetPos(); - TVector<bool> used(GetSize()); - size_t pos = 0; - while (pos < clist.GetSize()) { - PriorityStates states; - NextAndGetToGroups(states, clist.GetState(pos), letter, strpos, used); - UpdateNList(nlist, states); - ++pos; - } - DoSwap(clist, nlist); - return 0; - } - - void TakeAction(State&, Action) const {} - - Action Next(State& st, Char letter) const - { - return NextTranslated(st, Translate(letter)); - } - -public: - SlowCapturingScanner() - : SlowScanner(true) - { - } + { + TVector<Transition> nextStates; + NextStates(cur, letter, nextStates); + for (const auto& trans : nextStates) { + size_t st = trans.GetState(); + if (used[st]) + continue; + used[st] = true; + SingleState state(st); + const auto& action = trans.GetAction(); + state.SetBegin(cur.GetBegin()); + state.SetEnd(cur.GetEnd()); + if (action & BeginCapture && !cur.HasBegin()) { + state.SetBegin(pos); + } + if (action & EndCapture && !cur.HasEnd()) { + state.SetEnd(pos); + } + PriorityStates statesInside; + NextAndGetToGroups(statesInside, state, Translate(Epsilon), pos, used); + statesInside.Push(state, NoRepetition); + if (action & EndNonGreedyRepetition) { + auto& nongreedy = states.Get(NonGreedyRepetition); + InsertStates(statesInside, nongreedy, nongreedy, nongreedy); + } + else if (!(action & EndRepetition)) + InsertStates(statesInside, states.Get(NonGreedyRepetition), states.Get(NoRepetition), states.Get(GreedyRepetition)); + else { + auto& greedy = states.Get(GreedyRepetition); + InsertStates(statesInside, greedy, greedy, greedy); + } + } + } + + bool Captured(const SingleState& st, bool& matched) const + { + matched = false; + if (IsFinal(st.GetNum())) { + matched = true; + if (st.HasBegin()) + return true; + } + TVector<Transition> nextStates; + NextStates(st, Translate(EndMark), nextStates); + for (const auto& trans : nextStates) + { + size_t state = trans.GetState(); + if (IsFinal(state)) { + matched = true; + if (st.HasBegin() || (trans.GetAction() & ActionsCapture)) + return true; + } else { // After EndMark there can be Epsilon-transitions to the Final State + TVector<Transition> epsilonTrans; + SingleState newSt(state); + NextStates(newSt, Translate(Epsilon), epsilonTrans); + for (auto new_trans : epsilonTrans) { + size_t fin = new_trans.GetState(); + if (IsFinal(fin)) { + matched = true; + if (st.HasBegin() || (trans.GetAction() & ActionsCapture)) + return true; + } + } + } + } + return false; + } + + bool Matched(const SingleState& st) const + { + bool matched; + Captured(st, matched); + return matched; + } + + bool GetCapture(const State& st, SingleState& final) const + { + size_t pos = 0; + bool matched = false; + bool ans = false; + while (pos < st.GetSize() && !matched) { + ans = Captured(st.GetState(pos), matched); + ++pos; + } + if (matched) { + final = st.GetState(pos - 1); + return ans; + } else { + if (st.IsMatched()) { + final = st.GetMatched(); + return true; + } + return false; + } + } + + bool PushState(State& nlist, const SingleState& state) const + { + nlist.PushState(state); + if (Matched(state)) { + nlist.AddMatch(state); + return true; + } + return false; + } + + void UpdateNList(State& nlist, const PriorityStates& states) const + { + static constexpr std::array<RepetitionTypes, 3> m_type_by_priority{{NonGreedyRepetition, NoRepetition, GreedyRepetition}}; + for (const auto type : m_type_by_priority) { + for (const auto& state : states.Get(type)) { + if (PushState(nlist, state)) // Because we have strict priorities, after matching some state, we can be sure, that not states after will be better + return; + } + } + } + + void Initialize(State& nlist) const + { + PriorityStates states; + SingleState init(GetStart()); + TVector<bool> used(GetSize()); + NextAndGetToGroups(states, init, Translate(BeginMark), 0, used); + NextAndGetToGroups(states, 0, Translate(BeginMark), 0, used); + UpdateNList(nlist, states); + } + + Action NextTranslated(State& clist, Char letter) const + { + State nlist; + if (clist.IsMatched()) + nlist.AddMatch(clist.GetMatched()); + nlist.SetPos(clist.GetPos() + 1); + size_t strpos = nlist.GetPos(); + TVector<bool> used(GetSize()); + size_t pos = 0; + while (pos < clist.GetSize()) { + PriorityStates states; + NextAndGetToGroups(states, clist.GetState(pos), letter, strpos, used); + UpdateNList(nlist, states); + ++pos; + } + DoSwap(clist, nlist); + return 0; + } + + void TakeAction(State&, Action) const {} + + Action Next(State& st, Char letter) const + { + return NextTranslated(st, Translate(letter)); + } + +public: + SlowCapturingScanner() + : SlowScanner(true) + { + } SlowCapturingScanner(Fsm& fsm, size_t distance = 0) : SlowScanner(fsm, true, false, distance) - { - } -}; + { + } +}; namespace Features { Feature::Ptr Capture(size_t pos); diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp index 984d708dfa..b3709c591f 100644 --- a/contrib/libs/pire/pire/fsm.cpp +++ b/contrib/libs/pire/pire/fsm.cpp @@ -837,11 +837,11 @@ bool Fsm::LettersEquality::operator()(Char a, Char b) const return true; } -void Fsm::Sparse(bool needEpsilons /* = false */) +void Fsm::Sparse(bool needEpsilons /* = false */) { letters = LettersTbl(LettersEquality(m_transitions)); for (unsigned letter = 0; letter < MaxChar; ++letter) - if (letter != Epsilon || needEpsilons) + if (letter != Epsilon || needEpsilons) letters.Append(letter); m_sparsed = true; diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h index 4dad06ca06..275cdd2904 100644 --- a/contrib/libs/pire/pire/fsm.h +++ b/contrib/libs/pire/pire/fsm.h @@ -201,7 +201,7 @@ namespace Pire { /// Builds letters equivalence classes - void Sparse(bool needEpsilons = false); + void Sparse(bool needEpsilons = false); /// Unpacks all letters equivalence classs back into transitions table void Unsparse(); diff --git a/contrib/libs/pire/pire/re_parser.y b/contrib/libs/pire/pire/re_parser.y index dbad88e287..29aad9063f 100644 --- a/contrib/libs/pire/pire/re_parser.y +++ b/contrib/libs/pire/pire/re_parser.y @@ -139,7 +139,7 @@ iteration cur += (orig | Fsm()) * (repc.second - repc.first); } } - rlex.Parenthesized($$->As<Fsm>()); + rlex.Parenthesized($$->As<Fsm>()); delete $1; delete $2; } diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h index f6e1ff734d..101611e566 100644 --- a/contrib/libs/pire/pire/run.h +++ b/contrib/libs/pire/pire/run.h @@ -290,15 +290,15 @@ std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool thr { typename Scanner::State st; sc.Initialize(st); - if (throughBeginMark) - Pire::Step(sc, st, BeginMark); + if (throughBeginMark) + Pire::Step(sc, st, BeginMark); const char* pos = (sc.Final(st) ? str.data() : nullptr); Impl::DoRun(sc, st, str, Impl::LongestPrefixPred<Scanner>(pos)); - if (throughEndMark) { - Pire::Step(sc, st, EndMark); - if (sc.Final(st)) + if (throughEndMark) { + Pire::Step(sc, st, EndMark); + if (sc.Final(st)) pos = str.data() + str.size(); - } + } return pos ? str.substr(0, pos - str.data()) : std::string_view{}; } @@ -316,17 +316,17 @@ std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool th { typename Scanner::State st; sc.Initialize(st); - if (throughBeginMark) - Pire::Step(sc, st, BeginMark); + if (throughBeginMark) + Pire::Step(sc, st, BeginMark); if (sc.Final(st)) return str.substr(0, 0); const char* pos = nullptr; Impl::DoRun(sc, st, str, Impl::ShortestPrefixPred<Scanner>(pos)); - if (throughEndMark) { - Pire::Step(sc, st, EndMark); + if (throughEndMark) { + Pire::Step(sc, st, EndMark); if (sc.Final(st) && !pos) pos = str.data() + str.size(); - } + } return pos ? str.substr(0, pos - str.data()) : std::string_view{}; } @@ -347,8 +347,8 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s { typename Scanner::State state; scanner.Initialize(state); - if (throughEndMark) - Step(scanner, state, EndMark); + if (throughEndMark) + Step(scanner, state, EndMark); PIRE_IFDEBUG(Cdbg << "Running LongestSuffix on string " << ystring(str) << Endl); PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); @@ -363,11 +363,11 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s } if (scanner.Final(state)) suffix = str.substr(begin - str.data()); - if (throughBeginMark) { - Step(scanner, state, BeginMark); - if (scanner.Final(state)) + if (throughBeginMark) { + Step(scanner, state, BeginMark); + if (scanner.Final(state)) suffix = str.substr(begin - str.data()); - } + } return suffix; } @@ -386,8 +386,8 @@ inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view auto begin = str.data() + str.size(); typename Scanner::State state; scanner.Initialize(state); - if (throughEndMark) - Step(scanner, state, EndMark); + if (throughEndMark) + Step(scanner, state, EndMark); PIRE_IFDEBUG(Cdbg << "Running ShortestSuffix on string " << ystring(str) << Endl); PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); @@ -396,8 +396,8 @@ inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view scanner.Next(state, (unsigned char)*begin); PIRE_IFDEBUG(Cdbg << *rbegin << " => state " << StDump(scanner, state) << Endl); } - if (throughBeginMark) - Step(scanner, state, BeginMark); + if (throughBeginMark) + Step(scanner, state, BeginMark); return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{}; } diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp index 3956e3c6ed..24751dd776 100644 --- a/contrib/libs/pire/pire/scanner_io.cpp +++ b/contrib/libs/pire/pire/scanner_io.cpp @@ -97,15 +97,15 @@ void SlowScanner::Save(yostream* s) const size += sizeof(unsigned) * i.size(); } Impl::AlignSave(s, size); - if (need_actions) { - size_t pos = 0; - for (TVector< TVector< Action > >::const_iterator i = m_actionsvec.begin(), ie = m_actionsvec.end(); i != ie; ++i) - if (!i->empty()) { - SavePodArray(s, &(*i)[0], i->size()); - pos += sizeof(Action) * i->size(); - } - Impl::AlignSave(s, pos); - } + if (need_actions) { + size_t pos = 0; + for (TVector< TVector< Action > >::const_iterator i = m_actionsvec.begin(), ie = m_actionsvec.end(); i != ie; ++i) + if (!i->empty()) { + SavePodArray(s, &(*i)[0], i->size()); + pos += sizeof(Action) * i->size(); + } + Impl::AlignSave(s, pos); + } } } @@ -118,13 +118,13 @@ void SlowScanner::Load(yistream* s) bool empty; LoadPodType(s, empty); Impl::AlignLoad(s, sizeof(empty)); - sc.need_actions = need_actions; + sc.need_actions = need_actions; if (empty) { sc.Alias(Null()); } else { sc.m_vec.resize(sc.m.lettersCount * sc.m.statesCount); - if (sc.need_actions) - sc.m_actionsvec.resize(sc.m.lettersCount * sc.m.statesCount); + if (sc.need_actions) + sc.m_actionsvec.resize(sc.m.lettersCount * sc.m.statesCount); sc.m_vecptr = &sc.m_vec; sc.alloc(sc.m_letters, MaxChar); @@ -140,10 +140,10 @@ void SlowScanner::Load(yistream* s) size_t n; LoadPodType(s, n); i.resize(n - c); - if (sc.need_actions) { - act->resize(n - c); - ++act; - } + if (sc.need_actions) { + act->resize(n - c); + ++act; + } c = n; } Impl::AlignLoad(s, (m_vec.size() + 1) * sizeof(size_t)); @@ -155,16 +155,16 @@ void SlowScanner::Load(yistream* s) size += sizeof(unsigned) * i.size(); } Impl::AlignLoad(s, size); - size_t actSize = 0; - if (sc.need_actions) { + size_t actSize = 0; + if (sc.need_actions) { for (auto&& i : sc.m_actionsvec) { if (!i.empty()) { LoadPodArray(s, &(i)[0], i.size()); actSize += sizeof(Action) * i.size(); - } - } - Impl::AlignLoad(s, actSize); - } + } + } + Impl::AlignLoad(s, actSize); + } } Swap(sc); } diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h index 6adfcb8c1d..0417e25569 100644 --- a/contrib/libs/pire/pire/scanners/slow.h +++ b/contrib/libs/pire/pire/scanners/slow.h @@ -74,13 +74,13 @@ public: #endif }; - SlowScanner(bool needActions = false) { - Alias(Null()); - need_actions = needActions; - } - - size_t GetLettersCount() const {return m.lettersCount; }; + SlowScanner(bool needActions = false) { + Alias(Null()); + need_actions = needActions; + } + size_t GetLettersCount() const {return m.lettersCount; }; + size_t Size() const { return GetSize(); } size_t GetSize() const { return m.statesCount; } bool Empty() const { return m_finals == Null().m_finals; } @@ -194,8 +194,8 @@ public: Impl::MapPtr(s.m_finals, s.m.statesCount, p, size); Impl::MapPtr(s.m_jumpPos, s.m.statesCount * s.m.lettersCount + 1, p, size); Impl::MapPtr(s.m_jumps, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size); - if (need_actions) - Impl::MapPtr(s.m_actions, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size); + if (need_actions) + Impl::MapPtr(s.m_actions, s.m_jumpPos[s.m.statesCount * s.m.lettersCount], p, size); Swap(s); } return (const void*) p; @@ -205,7 +205,7 @@ public: { DoSwap(m_finals, s.m_finals); DoSwap(m_jumps, s.m_jumps); - DoSwap(m_actions, s.m_actions); + DoSwap(m_actions, s.m_actions); DoSwap(m_jumpPos, s.m_jumpPos); DoSwap(m.statesCount, s.m.statesCount); DoSwap(m.lettersCount, s.m.lettersCount); @@ -215,8 +215,8 @@ public: DoSwap(m_vec, s.m_vec); DoSwap(m_vecptr, s.m_vecptr); - DoSwap(need_actions, s.need_actions); - DoSwap(m_actionsvec, s.m_actionsvec); + DoSwap(need_actions, s.need_actions); + DoSwap(m_actionsvec, s.m_actionsvec); if (m_vecptr == &s.m_vec) m_vecptr = &m_vec; if (s.m_vecptr == &m_vec) @@ -226,14 +226,14 @@ public: SlowScanner(const SlowScanner& s) : m(s.m) , m_vec(s.m_vec) - , need_actions(s.need_actions) - , m_actionsvec(s.m_actionsvec) + , need_actions(s.need_actions) + , m_actionsvec(s.m_actionsvec) { if (s.m_vec.empty()) { // Empty or mmap()-ed scanner, just copy pointers m_finals = s.m_finals; m_jumps = s.m_jumps; - m_actions = s.m_actions; + m_actions = s.m_actions; m_jumpPos = s.m_jumpPos; m_letters = s.m_letters; m_vecptr = 0; @@ -243,7 +243,7 @@ public: memcpy(m_letters, s.m_letters, sizeof(*m_letters) * MaxChar); m_jumps = 0; m_jumpPos = 0; - m_actions = 0; + m_actions = 0; alloc(m_finals, m.statesCount); memcpy(m_finals, s.m_finals, sizeof(*m_finals) * m.statesCount); m_vecptr = &m_vec; @@ -251,25 +251,25 @@ public: } explicit SlowScanner(Fsm& fsm, bool needActions = false, bool removeEpsilons = true, size_t distance = 0) - : need_actions(needActions) + : need_actions(needActions) { if (distance) { fsm = CreateApproxFsm(fsm, distance); } - if (removeEpsilons) - fsm.RemoveEpsilons(); - fsm.Sparse(!removeEpsilons); + if (removeEpsilons) + fsm.RemoveEpsilons(); + fsm.Sparse(!removeEpsilons); m.statesCount = fsm.Size(); m.lettersCount = fsm.Letters().Size(); m_vec.resize(m.statesCount * m.lettersCount); - if (need_actions) - m_actionsvec.resize(m.statesCount * m.lettersCount); + if (need_actions) + m_actionsvec.resize(m.statesCount * m.lettersCount); m_vecptr = &m_vec; alloc(m_letters, MaxChar); m_jumps = 0; - m_actions = 0; + m_actions = 0; m_jumpPos = 0; alloc(m_finals, m.statesCount); @@ -297,47 +297,47 @@ public: const State& StateIndex(const State& s) const { return s; } -protected: - bool IsMmaped() const - { - return (!m_vecptr); - } - - size_t GetJump(size_t pos) const - { - return m_jumps[pos]; - } - - Action& GetAction(size_t pos) const - { - return m_actions[pos]; - } - - const TVector<Action>& GetActionsVec(size_t from) const - { - return m_actionsvec[from]; - } - - const TVector<unsigned int>& GetJumpsVec(size_t from) const - { - return m_vec[from]; - } - - size_t* GetJumpPos() const - { - return m_jumpPos; - } - - size_t GetStart() const - { - return m.start; - } - - bool IsFinal(size_t pos) const - { - return m_finals[pos]; - } - +protected: + bool IsMmaped() const + { + return (!m_vecptr); + } + + size_t GetJump(size_t pos) const + { + return m_jumps[pos]; + } + + Action& GetAction(size_t pos) const + { + return m_actions[pos]; + } + + const TVector<Action>& GetActionsVec(size_t from) const + { + return m_actionsvec[from]; + } + + const TVector<unsigned int>& GetJumpsVec(size_t from) const + { + return m_vec[from]; + } + + size_t* GetJumpPos() const + { + return m_jumpPos; + } + + size_t GetStart() const + { + return m.start; + } + + bool IsFinal(size_t pos) const + { + return m_finals[pos]; + } + private: struct Locals { @@ -348,15 +348,15 @@ private: bool* m_finals; unsigned* m_jumps; - Action* m_actions; + Action* m_actions; size_t* m_jumpPos; size_t* m_letters; TVector<void*> m_pool; TVector< TVector<unsigned> > m_vec, *m_vecptr; - bool need_actions; - TVector<TVector<Action>> m_actionsvec; + bool need_actions; + TVector<TVector<Action>> m_actionsvec; static const SlowScanner& Null(); template<class T> void alloc(T*& p, size_t size) @@ -370,18 +370,18 @@ private: { memcpy(&m, &s.m, sizeof(m)); m_vec.clear(); - need_actions = s.need_actions; - m_actionsvec.clear(); + need_actions = s.need_actions; + m_actionsvec.clear(); m_finals = s.m_finals; m_jumps = s.m_jumps; - m_actions = s.m_actions; + m_actions = s.m_actions; m_jumpPos = s.m_jumpPos; m_letters = s.m_letters; m_vecptr = s.m_vecptr; m_pool.clear(); } - void SetJump(size_t oldState, Char c, size_t newState, unsigned long action) + void SetJump(size_t oldState, Char c, size_t newState, unsigned long action) { Y_ASSERT(!m_vec.empty()); Y_ASSERT(oldState < m.statesCount); @@ -389,8 +389,8 @@ private: size_t idx = oldState * m.lettersCount + m_letters[c]; m_vec[idx].push_back(newState); - if (need_actions) - m_actionsvec[idx].push_back(action); + if (need_actions) + m_actionsvec[idx].push_back(action); } unsigned long RemapAction(unsigned long action) { return action; } diff --git a/library/cpp/regex/pire/regexp.h b/library/cpp/regex/pire/regexp.h index 94bba4064b..92ce16e14f 100644 --- a/library/cpp/regex/pire/regexp.h +++ b/library/cpp/regex/pire/regexp.h @@ -48,7 +48,7 @@ namespace NRegExp { }; static inline NPire::TFsm Parse(const TStringBuf& regexp, - const TOptions& opts, const bool needDetermine = true) { + const TOptions& opts, const bool needDetermine = true) { NPire::TLexer lexer; if (opts.Charset == CODES_UNKNOWN) { lexer.Assign(regexp.data(), regexp.data() + regexp.size()); @@ -98,9 +98,9 @@ namespace NRegExp { ret.Surround(); } - if (needDetermine) { - ret.Determine(); - } + if (needDetermine) { + ret.Determine(); + } return ret; } @@ -113,8 +113,8 @@ namespace NRegExp { public: inline explicit TFsmParser(const TStringBuf& regexp, - const TOptions& opts = TOptions(), bool needDetermine = true) - : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) + const TOptions& opts = TOptions(), bool needDetermine = true) + : Scanner(Parse(regexp, opts, needDetermine).template Compile<TScanner>()) { } @@ -177,19 +177,19 @@ namespace NRegExp { } }; - struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { + struct TSlowCapturingFsm : TFsmParser<NPire::TSlowCapturingScanner> { inline explicit TSlowCapturingFsm(const TStringBuf& regexp, - TOptions opts = TOptions()) - : TFsmParser<TScanner>(regexp, - opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) { - } - - inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm) - : TFsmParser<TScanner>(fsm) - { - } - }; - + TOptions opts = TOptions()) + : TFsmParser<TScanner>(regexp, + opts.SetSurround(true).CapturePos ? opts : opts.SetCapture(1), false) { + } + + inline TSlowCapturingFsm(const TFsmParser<TScanner>& fsm) + : TFsmParser<TScanner>(fsm) + { + } + }; + template <class TFsm> class TMatcherBase { public: @@ -285,53 +285,53 @@ namespace NRegExp { private: TStringBuf Data; }; - - class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{ - public: - typedef typename TSlowCapturingFsm::TScanner::State TState; - inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm) - : TMatcherBase<TSlowCapturingFsm>(fsm) - , HasCaptured(false) - { - } - - inline bool Captured() const noexcept { - return HasCaptured; - } - - inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { - TStringBuf textData(data, len); - Data = textData; + + class TSlowSearcher : TMatcherBase<TSlowCapturingFsm>{ + public: + typedef typename TSlowCapturingFsm::TScanner::State TState; + inline explicit TSlowSearcher(const TSlowCapturingFsm& fsm) + : TMatcherBase<TSlowCapturingFsm>(fsm) + , HasCaptured(false) + { + } + + inline bool Captured() const noexcept { + return HasCaptured; + } + + inline TSlowSearcher& Search(const char* data, size_t len, bool addBegin = false, bool addEnd = false) noexcept { + TStringBuf textData(data, len); + Data = textData; Run(Data.begin(), Data.size(), addBegin, addEnd); - return GetAns(); - } - + return GetAns(); + } + inline TSlowSearcher& Search(const TStringBuf& s) noexcept { return Search(s.data(), s.size()); - } - - inline TStringBuf GetCaptured() const noexcept { - return Ans; - } - - private: - TStringBuf Data; - TStringBuf Ans; - bool HasCaptured; - - inline TSlowSearcher& GetAns() { - auto state = GetState(); - Pire::SlowCapturingScanner::SingleState final; - if (!GetScanner().GetCapture(state, final)) { - HasCaptured = false; - } else { - if (!final.HasEnd()) { - final.SetEnd(Data.size()); - } - Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin()); - HasCaptured = true; - } - return *this; - } - }; + } + + inline TStringBuf GetCaptured() const noexcept { + return Ans; + } + + private: + TStringBuf Data; + TStringBuf Ans; + bool HasCaptured; + + inline TSlowSearcher& GetAns() { + auto state = GetState(); + Pire::SlowCapturingScanner::SingleState final; + if (!GetScanner().GetCapture(state, final)) { + HasCaptured = false; + } else { + if (!final.HasEnd()) { + final.SetEnd(Data.size()); + } + Ans = TStringBuf(Data, final.GetBegin(), final.GetEnd() - final.GetBegin()); + HasCaptured = true; + } + return *this; + } + }; } diff --git a/library/cpp/regex/pire/ut/regexp_ut.cpp b/library/cpp/regex/pire/ut/regexp_ut.cpp index e7206de9ad..19694cfe59 100644 --- a/library/cpp/regex/pire/ut/regexp_ut.cpp +++ b/library/cpp/regex/pire/ut/regexp_ut.cpp @@ -216,95 +216,95 @@ Y_UNIT_TEST_SUITE(TRegExp) { UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("xx")); } - Y_UNIT_TEST(SlowCapture) { - TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", - TFsm::TOptions().SetCapture(2)); - TSlowSearcher searcher(fsm); - searcher.Search("http://vkontakte.ru/id100500"); - UNIT_ASSERT(searcher.Captured()); + Y_UNIT_TEST(SlowCapture) { + TSlowCapturingFsm fsm("^http://vk(ontakte[.]ru|[.]com)/id(\\d+)([^0-9]|$)", + TFsm::TOptions().SetCapture(2)); + TSlowSearcher searcher(fsm); + searcher.Search("http://vkontakte.ru/id100500"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("100500")); - } - - Y_UNIT_TEST(SlowCaptureGreedy) { - TSlowCapturingFsm fsm(".*(pref.*suff)"); - TSlowSearcher searcher(fsm); - searcher.Search("pref ala bla pref cla suff dla"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCaptureGreedy) { + TSlowCapturingFsm fsm(".*(pref.*suff)"); + TSlowSearcher searcher(fsm); + searcher.Search("pref ala bla pref cla suff dla"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref cla suff")); - } - - Y_UNIT_TEST(SlowCaptureNonGreedy) { - TSlowCapturingFsm fsm(".*?(pref.*suff)"); - TSlowSearcher searcher(fsm); - searcher.Search("pref ala bla pref cla suff dla"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCaptureNonGreedy) { + TSlowCapturingFsm fsm(".*?(pref.*suff)"); + TSlowSearcher searcher(fsm); + searcher.Search("pref ala bla pref cla suff dla"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("pref ala bla pref cla suff")); - } - - Y_UNIT_TEST(SlowCapture2) { - TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", - TFsm::TOptions().SetCharset(CODES_UTF8)); - - TSlowSearcher searcher(fsm); - searcher.Search(" Здравствуйте, Уважаемый (-ая)! "); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCapture2) { + TSlowCapturingFsm fsm("Здравствуйте, ((\\s|\\w|[()]|-)+)!", + TFsm::TOptions().SetCharset(CODES_UTF8)); + + TSlowSearcher searcher(fsm); + searcher.Search(" Здравствуйте, Уважаемый (-ая)! "); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("Уважаемый (-ая)")); - } - - Y_UNIT_TEST(SlowCapture3) { - TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);"); - TSlowSearcher searcher(fsm); - searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCapture3) { + TSlowCapturingFsm fsm("here we have user_id=([a-z0-9]+);"); + TSlowSearcher searcher(fsm); + searcher.Search("in db and here we have user_id=0x0d0a; same as CRLF"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("0x0d0a")); - } - - Y_UNIT_TEST(SlowCapture4) { - TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\""); - TSlowSearcher searcher(fsm); - searcher.Search("\"/away.php?to=http:some.addr\"&id=1"); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(SlowCapture4) { + TSlowCapturingFsm fsm("away\\.php\\?to=http:([^\"]+)\""); + TSlowSearcher searcher(fsm); + searcher.Search("\"/away.php?to=http:some.addr\"&id=1"); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("some.addr")); - } - - Y_UNIT_TEST(CapturedEmptySlow) { - TSlowCapturingFsm fsm("Comments=(.*)$"); - TSlowSearcher searcher(fsm); - searcher.Search("And Comments="); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(CapturedEmptySlow) { + TSlowCapturingFsm fsm("Comments=(.*)$"); + TSlowSearcher searcher(fsm); + searcher.Search("And Comments="); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("")); - } - - Y_UNIT_TEST(CaptureInOrFirst) { - TSlowCapturingFsm fsm("(A)|A"); - TSlowSearcher searcher(fsm); - searcher.Search("A"); - UNIT_ASSERT(searcher.Captured()); - } - - Y_UNIT_TEST(CaptureInOrSecond) { - TSlowCapturingFsm fsm("A|(A)"); - TSlowSearcher searcher(fsm); - searcher.Search("A"); - UNIT_ASSERT(!searcher.Captured()); - } - - Y_UNIT_TEST(CaptureOutside) { - TSlowCapturingFsm fsm("((ID=([0-9]+))?)"); - TSlowSearcher searcher(fsm); - searcher.Search("ID="); - UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(CaptureInOrFirst) { + TSlowCapturingFsm fsm("(A)|A"); + TSlowSearcher searcher(fsm); + searcher.Search("A"); + UNIT_ASSERT(searcher.Captured()); + } + + Y_UNIT_TEST(CaptureInOrSecond) { + TSlowCapturingFsm fsm("A|(A)"); + TSlowSearcher searcher(fsm); + searcher.Search("A"); + UNIT_ASSERT(!searcher.Captured()); + } + + Y_UNIT_TEST(CaptureOutside) { + TSlowCapturingFsm fsm("((ID=([0-9]+))?)"); + TSlowSearcher searcher(fsm); + searcher.Search("ID="); + UNIT_ASSERT(searcher.Captured()); UNIT_ASSERT_VALUES_EQUAL(searcher.GetCaptured(), TStringBuf("")); - } - - Y_UNIT_TEST(CaptureInside) { - TSlowCapturingFsm fsm("((ID=([0-9]+))?)", - TFsm::TOptions().SetCapture(2)); - TSlowSearcher searcher(fsm); - searcher.Search("ID="); - UNIT_ASSERT(!searcher.Captured()); - } - + } + + Y_UNIT_TEST(CaptureInside) { + TSlowCapturingFsm fsm("((ID=([0-9]+))?)", + TFsm::TOptions().SetCapture(2)); + TSlowSearcher searcher(fsm); + searcher.Search("ID="); + UNIT_ASSERT(!searcher.Captured()); + } + Y_UNIT_TEST(Pcre2PireTest) { UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)"), "(fake)"); UNIT_ASSERT_VALUES_EQUAL(Pcre2Pire("(?:fake)??"), "(fake)?"); |