diff options
author | kv75 <kv75@yandex-team.ru> | 2022-02-10 16:48:07 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:07 +0300 |
commit | 67b8a8d9e3b2255cfba49c29979126c03098e00e (patch) | |
tree | 15900cb76401efa8493b960e5c4c0216e0609730 /contrib/libs/pire | |
parent | ed2bfbca3e30e641448ad350b4305c69e12aff88 (diff) | |
download | ydb-67b8a8d9e3b2255cfba49c29979126c03098e00e.tar.gz |
Restoring authorship annotation for <kv75@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/pire')
-rw-r--r-- | contrib/libs/pire/pire/Makefile.am | 4 | ||||
-rw-r--r-- | contrib/libs/pire/pire/determine.h | 6 | ||||
-rw-r--r-- | contrib/libs/pire/pire/extra/count.cpp | 1492 | ||||
-rw-r--r-- | contrib/libs/pire/pire/extra/count.h | 132 | ||||
-rw-r--r-- | contrib/libs/pire/pire/fsm.cpp | 206 | ||||
-rw-r--r-- | contrib/libs/pire/pire/fsm.h | 4 | ||||
-rw-r--r-- | contrib/libs/pire/pire/minimize.h | 306 |
7 files changed, 1075 insertions, 1075 deletions
diff --git a/contrib/libs/pire/pire/Makefile.am b/contrib/libs/pire/pire/Makefile.am index 09ef211704..8c72b9a652 100644 --- a/contrib/libs/pire/pire/Makefile.am +++ b/contrib/libs/pire/pire/Makefile.am @@ -22,7 +22,7 @@ libpire_la_SOURCES = \ fwd.h \ glue.cpp \ glue.h \ - minimize.h \ + minimize.h \ half_final_fsm.cpp \ half_final_fsm.h \ partition.h \ @@ -73,7 +73,7 @@ pire_hdr_HEADERS = \ fsm.h \ fwd.h \ glue.h \ - minimize.h \ + minimize.h \ half_final_fsm.h \ partition.h \ pire.h \ diff --git a/contrib/libs/pire/pire/determine.h b/contrib/libs/pire/pire/determine.h index fb48fdd0b3..2083113b4b 100644 --- a/contrib/libs/pire/pire/determine.h +++ b/contrib/libs/pire/pire/determine.h @@ -136,9 +136,9 @@ namespace Pire { } return task.Success(); } - - // Faster transition table representation for determined FSM - typedef TVector<size_t> DeterminedTransitions; + + // Faster transition table representation for determined FSM + typedef TVector<size_t> DeterminedTransitions; } } diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp index 468ff61d92..dbc8f3f03b 100644 --- a/contrib/libs/pire/pire/extra/count.cpp +++ b/contrib/libs/pire/pire/extra/count.cpp @@ -21,725 +21,725 @@ */ -#include "count.h" - +#include "count.h" + #include <contrib/libs/pire/pire/fsm.h> #include <contrib/libs/pire/pire/determine.h> #include <contrib/libs/pire/pire/glue.h> -#include <contrib/libs/pire/pire/minimize.h> +#include <contrib/libs/pire/pire/minimize.h> #include <contrib/libs/pire/pire/stub/lexical_cast.h> -#include <contrib/libs/pire/pire/stub/stl.h> +#include <contrib/libs/pire/pire/stub/stl.h> -#include <tuple> +#include <tuple> namespace Pire { -namespace Impl { - -typedef LoadedScanner::Action Action; -typedef TMap<Char, Action> TransitionTagRow; -typedef TVector<TransitionTagRow> TransitionTagTable; - -class CountingFsmTask; - -class CountingFsm { -public: - typedef Fsm::LettersTbl LettersTbl; - - enum { - NotMatched = 1 << 0, - Matched = 1 << 1, - Separated = 1 << 2, - }; - - explicit CountingFsm(Fsm re, Fsm sep) - : mFsm(std::move(re)) - { - mFsm.Canonize(); - const auto reMatchedStates = mFsm.Finals(); - - sep.Canonize(); - for (size_t state = 0; state < sep.Size(); ++state) { - sep.SetTag(state, Separated); - } - mFsm += sep; - - mReInitial = mFsm.Initial(); - const auto allowEmptySeparator = sep.IsFinal(sep.Initial()); - for (auto reMatchedState : reMatchedStates) { - mFsm.SetTag(reMatchedState, Matched); - if (allowEmptySeparator) { - mFsm.SetFinal(reMatchedState, true); - } - } - - mFsm.PrependAnything(); - mFsm.RemoveEpsilons(); - } - - const LettersTbl& Letters() const { - return mFsm.Letters(); - } - - const Fsm& Determined() const { - return mDetermined; - } - - Action Output(size_t from, Char letter) const { - const auto& row = mActions[from]; - const auto it = row.find(letter); - if (it != row.end()) { - return it->second; - } else { - return 0; - } - } - - bool Simple() const { - return mSimple; - } - - bool Determine(); - void Minimize(); - -private: - void SwapTaskOutputs(CountingFsmTask& task); - -private: - Fsm mFsm; - size_t mReInitial; - Fsm mDetermined; - TransitionTagTable mActions; - bool mSimple; -}; - -class CountingFsmTask { -public: - typedef Fsm::LettersTbl LettersTbl; - - virtual ~CountingFsmTask() {} - - void Connect(size_t from, size_t to, Char letter) { - mNewFsm.Connect(from, to, letter); - } - - typedef bool Result; - - static Result Success() { - return true; - } - - static Result Failure() { - return false; - } - - Fsm& Output() { - return mNewFsm; - } - - TransitionTagTable& Actions() { - return mNewActions; - } - -protected: - void ResizeOutput(size_t size) { - mNewFsm.Resize(size); - mNewActions.resize(size); - } - -private: - Fsm mNewFsm; - TransitionTagTable mNewActions; -}; - -class StateLessForMinimize { -public: - StateLessForMinimize(const CountingFsm& fsm) : mFsm(fsm) {} - - bool operator()(size_t first, size_t second) const { - for (auto&& lettersEl : mFsm.Letters()) { - const auto letter = lettersEl.first; - if (mFsm.Output(first, letter) != mFsm.Output(second, letter)) { - return mFsm.Output(first, letter) < mFsm.Output(second, letter); - } - } - return false; - } - -private: - const CountingFsm& mFsm; -}; - - -class CountingFsmMinimizeTask : public CountingFsmTask { -public: - explicit CountingFsmMinimizeTask(const CountingFsm& fsm) - : mFsm(fsm) - , reversedTransitions(fsm.Determined().Size()) - , StateClass(fsm.Determined().Size()) - , Classes(0) - { - TMap<size_t, size_t, StateLessForMinimize> stateClassMap = TMap<size_t, size_t, StateLessForMinimize>(StateLessForMinimize(mFsm)); - for (size_t state = 0; state < mFsm.Determined().Size(); ++state) { - if (stateClassMap.find(state) == stateClassMap.end()) { - stateClassMap[state] = Classes++; - } - StateClass[state] = stateClassMap[state]; - reversedTransitions[state].resize(mFsm.Letters().Size()); - } - - for (size_t state = 0; state < mFsm.Determined().Size(); ++state) { - TSet<ypair<Char, size_t>> usedTransitions; - for (const auto& letter : mFsm.Letters()) { - const auto destination = Next(state, letter.first); - const auto letterId = letter.second.first; - if (usedTransitions.find(ymake_pair(letterId, destination)) == usedTransitions.end()) { - usedTransitions.insert(ymake_pair(letterId, destination)); - reversedTransitions[destination][letterId].push_back(state); - } - } - } - } - - TVector<size_t>& GetStateClass() { return StateClass; } - - size_t& GetClassesNumber() { return Classes; } - - size_t LettersCount() const { - return mFsm.Letters().Size(); - } - - bool IsDetermined() const { - return mFsm.Determined().IsDetermined(); - } - - size_t Size() const { - return mFsm.Determined().Size(); - } - - const TVector<size_t>& Previous(size_t state, size_t letter) const { - return reversedTransitions[state][letter]; - } - - void AcceptStates() { - ResizeOutput(Classes); - auto& newFsm = Output(); - auto& newActions = Actions(); - newFsm.SetFinal(0, false); - - // Unite equality classes into new states - for (size_t from = 0; from != Size(); ++from) { - const auto fromMinimized = StateClass[from]; - for (auto&& letter : mFsm.Letters()) { +namespace Impl { + +typedef LoadedScanner::Action Action; +typedef TMap<Char, Action> TransitionTagRow; +typedef TVector<TransitionTagRow> TransitionTagTable; + +class CountingFsmTask; + +class CountingFsm { +public: + typedef Fsm::LettersTbl LettersTbl; + + enum { + NotMatched = 1 << 0, + Matched = 1 << 1, + Separated = 1 << 2, + }; + + explicit CountingFsm(Fsm re, Fsm sep) + : mFsm(std::move(re)) + { + mFsm.Canonize(); + const auto reMatchedStates = mFsm.Finals(); + + sep.Canonize(); + for (size_t state = 0; state < sep.Size(); ++state) { + sep.SetTag(state, Separated); + } + mFsm += sep; + + mReInitial = mFsm.Initial(); + const auto allowEmptySeparator = sep.IsFinal(sep.Initial()); + for (auto reMatchedState : reMatchedStates) { + mFsm.SetTag(reMatchedState, Matched); + if (allowEmptySeparator) { + mFsm.SetFinal(reMatchedState, true); + } + } + + mFsm.PrependAnything(); + mFsm.RemoveEpsilons(); + } + + const LettersTbl& Letters() const { + return mFsm.Letters(); + } + + const Fsm& Determined() const { + return mDetermined; + } + + Action Output(size_t from, Char letter) const { + const auto& row = mActions[from]; + const auto it = row.find(letter); + if (it != row.end()) { + return it->second; + } else { + return 0; + } + } + + bool Simple() const { + return mSimple; + } + + bool Determine(); + void Minimize(); + +private: + void SwapTaskOutputs(CountingFsmTask& task); + +private: + Fsm mFsm; + size_t mReInitial; + Fsm mDetermined; + TransitionTagTable mActions; + bool mSimple; +}; + +class CountingFsmTask { +public: + typedef Fsm::LettersTbl LettersTbl; + + virtual ~CountingFsmTask() {} + + void Connect(size_t from, size_t to, Char letter) { + mNewFsm.Connect(from, to, letter); + } + + typedef bool Result; + + static Result Success() { + return true; + } + + static Result Failure() { + return false; + } + + Fsm& Output() { + return mNewFsm; + } + + TransitionTagTable& Actions() { + return mNewActions; + } + +protected: + void ResizeOutput(size_t size) { + mNewFsm.Resize(size); + mNewActions.resize(size); + } + +private: + Fsm mNewFsm; + TransitionTagTable mNewActions; +}; + +class StateLessForMinimize { +public: + StateLessForMinimize(const CountingFsm& fsm) : mFsm(fsm) {} + + bool operator()(size_t first, size_t second) const { + for (auto&& lettersEl : mFsm.Letters()) { + const auto letter = lettersEl.first; + if (mFsm.Output(first, letter) != mFsm.Output(second, letter)) { + return mFsm.Output(first, letter) < mFsm.Output(second, letter); + } + } + return false; + } + +private: + const CountingFsm& mFsm; +}; + + +class CountingFsmMinimizeTask : public CountingFsmTask { +public: + explicit CountingFsmMinimizeTask(const CountingFsm& fsm) + : mFsm(fsm) + , reversedTransitions(fsm.Determined().Size()) + , StateClass(fsm.Determined().Size()) + , Classes(0) + { + TMap<size_t, size_t, StateLessForMinimize> stateClassMap = TMap<size_t, size_t, StateLessForMinimize>(StateLessForMinimize(mFsm)); + for (size_t state = 0; state < mFsm.Determined().Size(); ++state) { + if (stateClassMap.find(state) == stateClassMap.end()) { + stateClassMap[state] = Classes++; + } + StateClass[state] = stateClassMap[state]; + reversedTransitions[state].resize(mFsm.Letters().Size()); + } + + for (size_t state = 0; state < mFsm.Determined().Size(); ++state) { + TSet<ypair<Char, size_t>> usedTransitions; + for (const auto& letter : mFsm.Letters()) { + const auto destination = Next(state, letter.first); + const auto letterId = letter.second.first; + if (usedTransitions.find(ymake_pair(letterId, destination)) == usedTransitions.end()) { + usedTransitions.insert(ymake_pair(letterId, destination)); + reversedTransitions[destination][letterId].push_back(state); + } + } + } + } + + TVector<size_t>& GetStateClass() { return StateClass; } + + size_t& GetClassesNumber() { return Classes; } + + size_t LettersCount() const { + return mFsm.Letters().Size(); + } + + bool IsDetermined() const { + return mFsm.Determined().IsDetermined(); + } + + size_t Size() const { + return mFsm.Determined().Size(); + } + + const TVector<size_t>& Previous(size_t state, size_t letter) const { + return reversedTransitions[state][letter]; + } + + void AcceptStates() { + ResizeOutput(Classes); + auto& newFsm = Output(); + auto& newActions = Actions(); + newFsm.SetFinal(0, false); + + // Unite equality classes into new states + for (size_t from = 0; from != Size(); ++from) { + const auto fromMinimized = StateClass[from]; + for (auto&& letter : mFsm.Letters()) { const auto representative = letter.first; - const auto next = Next(from, representative); - const auto nextMinimized = StateClass[next]; - Connect(fromMinimized, nextMinimized, representative); - const auto outputs = mFsm.Output(from, representative); - if (outputs) { - newActions[fromMinimized][representative] = outputs; - } - } - if (mFsm.Determined().IsFinal(from)) { - newFsm.SetFinal(fromMinimized, true); - } - } - newFsm.SetInitial(StateClass[mFsm.Determined().Initial()]); - newFsm.SetIsDetermined(true); - } - -private: - const CountingFsm& mFsm; - TVector<TVector<TVector<size_t>>> reversedTransitions; - TVector<size_t> StateClass; - size_t Classes; - - size_t Next(size_t state, Char letter) const { - const auto& tos = mFsm.Determined().Destinations(state, letter); - Y_ASSERT(tos.size() == 1); - return *tos.begin(); - } -}; - -typedef size_t RawState; -typedef ypair<RawState, unsigned long> TaggedState; -typedef TSet<TaggedState> StateGroup; - -struct DeterminedState { -public: - StateGroup matched; - StateGroup unmatched; - StateGroup separated; - StateGroup lagging; -}; - -bool operator < (const DeterminedState& left, const DeterminedState& right) { - auto asTuple = [](const DeterminedState& state) { - return std::tie(state.matched, state.unmatched, state.separated, state.lagging); - }; - - return asTuple(left) < asTuple(right); -} - -bool InvalidCharRange(const TVector<Char>& range) { - for (const auto letter : range) { - if (letter < MaxCharUnaligned && letter != 256) { - return false; - } - } - return true; -} - -class BasicCountingFsmDetermineTask : public CountingFsmTask { -public: - using CountingFsmTask::LettersTbl; - typedef DeterminedState State; - typedef TMap<State, size_t> InvStates; - - explicit BasicCountingFsmDetermineTask(const Fsm& fsm, RawState reInitial) - : mFsm(fsm) - , mReInitial{reInitial} - { - mDeadStates = fsm.DeadStates(); + const auto next = Next(from, representative); + const auto nextMinimized = StateClass[next]; + Connect(fromMinimized, nextMinimized, representative); + const auto outputs = mFsm.Output(from, representative); + if (outputs) { + newActions[fromMinimized][representative] = outputs; + } + } + if (mFsm.Determined().IsFinal(from)) { + newFsm.SetFinal(fromMinimized, true); + } + } + newFsm.SetInitial(StateClass[mFsm.Determined().Initial()]); + newFsm.SetIsDetermined(true); + } + +private: + const CountingFsm& mFsm; + TVector<TVector<TVector<size_t>>> reversedTransitions; + TVector<size_t> StateClass; + size_t Classes; + + size_t Next(size_t state, Char letter) const { + const auto& tos = mFsm.Determined().Destinations(state, letter); + Y_ASSERT(tos.size() == 1); + return *tos.begin(); + } +}; + +typedef size_t RawState; +typedef ypair<RawState, unsigned long> TaggedState; +typedef TSet<TaggedState> StateGroup; + +struct DeterminedState { +public: + StateGroup matched; + StateGroup unmatched; + StateGroup separated; + StateGroup lagging; +}; + +bool operator < (const DeterminedState& left, const DeterminedState& right) { + auto asTuple = [](const DeterminedState& state) { + return std::tie(state.matched, state.unmatched, state.separated, state.lagging); + }; + + return asTuple(left) < asTuple(right); +} + +bool InvalidCharRange(const TVector<Char>& range) { + for (const auto letter : range) { + if (letter < MaxCharUnaligned && letter != 256) { + return false; + } + } + return true; +} + +class BasicCountingFsmDetermineTask : public CountingFsmTask { +public: + using CountingFsmTask::LettersTbl; + typedef DeterminedState State; + typedef TMap<State, size_t> InvStates; + + explicit BasicCountingFsmDetermineTask(const Fsm& fsm, RawState reInitial) + : mFsm(fsm) + , mReInitial{reInitial} + { + mDeadStates = fsm.DeadStates(); for (auto&& letter : fsm.Letters()) { if (InvalidCharRange(letter.second.second)) { mInvalidLetters.insert(letter.first); - } - } - } - - const LettersTbl& Letters() const { - return mFsm.Letters(); - } - - State Initial() const { - return State{StateGroup{}, InitialGroup(), StateGroup{}, StateGroup{}}; - } - - bool IsRequired(const State& state) const { - Y_UNUSED(state); - return true; - } - - State Next(const State& state, Char letter) const { - if (mInvalidLetters.count(letter) != 0) { - AddAction(state, letter, CountingFsm::NotMatched); - return Initial(); - } - - auto next = PrepareNextState(state, letter); - AddAction(state, letter, CalculateTransitionTag(state, next)); - PostProcessNextState(next); - NormalizeState(next); - - return next; - } - - void AcceptStates(const TVector<State>& states) - { - ResizeOutput(states.size()); - auto& newFsm = Output(); - auto& newActions = Actions(); - newFsm.SetInitial(0); - newFsm.SetIsDetermined(true); - - for (size_t ns = 0; ns < states.size(); ++ns) { - const auto& state = states[ns]; - newFsm.SetFinal(ns, HasFinals(state.unmatched)); - - auto outputIt = mActionByState.find(state); - if (outputIt != mActionByState.end()) { - newActions[ns].swap(outputIt->second); - } - } - } - -protected: - void SplitDestinations(StateGroup& matched, StateGroup& unmatched, StateGroup& separated, const StateGroup& source, Char letter) const { - for (const auto& state : source) { - MakeTaggedStates(matched, unmatched, separated, mFsm.Destinations(state.first, letter), state.second); - if (mFsm.IsFinal(state.first)) { - // Implicit epsilon transitions from final states to reInitial after matching separator - MakeTaggedStates(separated, separated, separated, mFsm.Destinations(mReInitial, letter), CountingFsm::Separated); - } - } - } - - Action CalculateTransitionTagImpl(const State& dest) const { - Action result = 0; - if (!dest.matched.empty()) { - result = AdvancedCountingScanner::IncrementAction; - } else if (dest.unmatched.empty()) { - if (!dest.separated.empty()) { - for (const auto& state : dest.separated) { - if (state.second == CountingFsm::Matched) { - result = AdvancedCountingScanner::IncrementAction; - } - } - } else { - result = AdvancedCountingScanner::ResetAction; - for (const auto& state : dest.lagging) { - if (state.second != CountingFsm::NotMatched) { - result |= AdvancedCountingScanner::IncrementAction; - } - } - } - } - return result; - } - - unsigned long TagsOfGroup(const StateGroup& group) const { - unsigned long result = 0; - for (const auto& state : group) { - result |= state.second; - } - return result; - } - - void SplitGroupByTag(StateGroup& matched, StateGroup& unmatched, StateGroup& separated, const StateGroup& source, bool useFsmTag) const { - for (const auto& state : source) { - auto tag = useFsmTag ? mFsm.Tag(state.first) : state.second; - if (tag == CountingFsm::Matched) { - matched.insert(state); - } else if (tag == CountingFsm::Separated) { - separated.insert(state); - } else { - unmatched.insert(state); - } - } - } - - void UpdateLaggingStates(State& state, bool moveToLagging) const { - if (!state.matched.empty()) { - if (moveToLagging) { - state.lagging.insert(state.unmatched.cbegin(), state.unmatched.cend()); - state.lagging.insert(state.separated.cbegin(), state.separated.cend()); - } - state.unmatched.clear(); - state.separated.clear(); - } - if (state.unmatched.empty() && !state.separated.empty()) { - const auto unmatchedTags = TagsOfGroup(state.separated); - if ((unmatchedTags & CountingFsm::Matched) && (unmatchedTags != CountingFsm::Matched)) { - StateGroup separatedMatched; - for (const auto& separatedState : state.separated) { - if (separatedState.second == CountingFsm::Matched) { - separatedMatched.insert(separatedState); - } else if (moveToLagging) { - state.lagging.insert(separatedState); - } - } - state.separated.swap(separatedMatched); - } - } - } - - void RemoveDuplicateLaggingStates(State& state) const { - const auto statesToRemove = GetRawStates({state.matched, state.unmatched, state.separated}, 0); - const auto unmatchedStatesToRemove = GetRawStates({state.lagging}, CountingFsm::NotMatched); - - StateGroup newLagging; - for (const auto& taggedState : state.lagging) { - if (statesToRemove.count(taggedState.first) == 0) { - if (taggedState.second != CountingFsm::NotMatched || unmatchedStatesToRemove.count(taggedState.first) == 0) { - newLagging.insert(taggedState); - } - } - } - state.lagging.swap(newLagging); - } - - void RemoveDuplicateSeparatedStates(State& state) const { - if (state.separated.empty()) { - return; - } - const auto statesToRemove = GetRawStates({state.matched, state.unmatched}, 0); - RemoveRawStates(state.separated, statesToRemove); - } - - void NormalizeState(State& state) const { - if (!state.matched.empty()) { - Y_ASSERT(state.unmatched.empty()); - state.unmatched.swap(state.matched); - } - - if (state.unmatched.empty() && !state.separated.empty()) { - state.unmatched.swap(state.separated); - } - - if (state.unmatched.empty() && !state.lagging.empty()) { - State groups; - SplitGroupByTag(groups.matched, groups.unmatched, groups.separated, state.lagging, false); - if (!groups.matched.empty()) { - state.unmatched.swap(groups.matched); - state.separated.swap(groups.separated); - state.lagging.swap(groups.unmatched); - } else if (!groups.separated.empty()) { - state.unmatched.swap(groups.separated); - state.lagging.swap(groups.unmatched); - } else { - state.unmatched.swap(groups.unmatched); - state.lagging.swap(groups.matched); // just clear - } - } - } - -private: - virtual State PrepareNextState(const State& state, Char letter) const = 0; - - virtual void PostProcessNextState(State& next) const = 0; - - virtual Action CalculateTransitionTag(const State& source, const State& dest) const { - Y_UNUSED(source); - return CalculateTransitionTagImpl(dest); - } - - virtual StateGroup InitialGroup() const { - return StateGroup{TaggedState{mFsm.Initial(), CountingFsm::NotMatched}}; - } - - void AddAction(State from, Char letter, unsigned long value) const { - if (!value) { - return; - } - TransitionTagRow& row = mActionByState[from]; - row[letter] = value; - } - - void MakeTaggedStates(StateGroup& matched, StateGroup& unmatched, StateGroup& separated, const Fsm::StatesSet& destinations, unsigned long sourceTag) const { - for (const auto destState : destinations) { - if (mDeadStates.count(destState) == 0) { - const auto destTag = mFsm.Tag(destState); - if (sourceTag != CountingFsm::Matched && destTag == CountingFsm::Matched) { - matched.insert(ymake_pair(destState, destTag)); - } else if (sourceTag == CountingFsm::Separated || destTag == CountingFsm::Separated) { - separated.insert(ymake_pair(destState, CountingFsm::Separated)); - } else { - unmatched.insert(ymake_pair(destState, sourceTag)); - } - } - } - } - - bool HasFinals(const StateGroup& states) const { - for (const auto& state : states) { - if (mFsm.IsFinal(state.first)) { - return true; - } - } - return false; - } - - Fsm::StatesSet GetRawStates(const TVector<std::reference_wrapper<const StateGroup>> groups, unsigned long excludedTags) const { - Fsm::StatesSet result; - for (const auto& group : groups) { - for (const auto& taggedState : group.get()) { - if (!(taggedState.second & excludedTags)) { - result.insert(taggedState.first); - } - } - } - return result; - } - - void RemoveRawStates(StateGroup& group, const Fsm::StatesSet& states) const { - StateGroup removing; - for (const auto& taggedState : group) { - if (states.count(taggedState.first) != 0) { - removing.insert(taggedState); - } - } - for (const auto& taggedState : removing) { - group.erase(taggedState); - } - } - -private: - const Fsm& mFsm; - RawState mReInitial; - Fsm::StatesSet mDeadStates; - TSet<Char> mInvalidLetters; - - mutable TMap<State, TransitionTagRow> mActionByState; -}; - -class CountingFsmDetermineTask : public BasicCountingFsmDetermineTask { -public: - using BasicCountingFsmDetermineTask::State; - using BasicCountingFsmDetermineTask::LettersTbl; - using BasicCountingFsmDetermineTask::InvStates; - - explicit CountingFsmDetermineTask(const Fsm& fsm, RawState reInitial) - : BasicCountingFsmDetermineTask{fsm, reInitial} - {} - -private: - State PrepareNextState(const State& state, Char letter) const override { - State next; - SplitDestinations(next.matched, next.unmatched, next.separated, state.unmatched, letter); - SplitDestinations(next.separated, next.separated, next.separated, state.separated, letter); - SplitDestinations(next.lagging, next.lagging, next.lagging, state.lagging, letter); - return next; - } - - void PostProcessNextState(State& next) const override { - UpdateLaggingStates(next, true); - RemoveDuplicateLaggingStates(next); - RemoveDuplicateSeparatedStates(next); - } -}; - -class SimpleCountingFsmDetermineTask : public BasicCountingFsmDetermineTask { -public: - using BasicCountingFsmDetermineTask::State; - using BasicCountingFsmDetermineTask::LettersTbl; - using BasicCountingFsmDetermineTask::InvStates; - - static constexpr unsigned long MixedTags = CountingFsm::Separated | CountingFsm::Matched; - - SimpleCountingFsmDetermineTask(const Fsm& fsm, RawState reInitial) - : BasicCountingFsmDetermineTask{fsm, reInitial} - , mStartState{reInitial, CountingFsm::NotMatched} - {} - -private: - State PrepareNextState(const State& state, Char letter) const override { - State next; - auto from = state; - const auto fromIsEmpty = IsEmptyState(from); - if (fromIsEmpty) { - from.unmatched.insert(mStartState); - } - Y_ASSERT(IsValidState(from)); - - SplitDestinations(next.matched, next.unmatched, next.separated, from.unmatched, letter); - if (next.matched.empty() && !next.separated.empty()) { - if (next.unmatched.empty()) { - SplitSeparatedByFsmTag(next); - if (next.separated.size() > 1) { - RemoveDuplicateSeparatedStates(next); - } - if (next.unmatched.empty()) { - next.unmatched.swap(next.separated); - } - } else { - ChooseOneSeparatedState(next); - } - } - if (next.matched.empty() && next.separated.empty() && !from.separated.empty()) { - if (!next.unmatched.empty()) { - ChooseOneDestState(next.separated, from.separated, letter); - } else { - SplitDestinations(next.matched, next.unmatched, next.separated, from.separated, letter); - if (next.matched.empty() && !next.separated.empty()) { - SplitSeparatedByFsmTag(next); - } - } - ChooseOneSeparatedState(next); - } - if (!fromIsEmpty && IsEmptyState(next)) { - ChooseOneDestState(next.lagging, StateGroup{mStartState}, letter); - } - - return next; - } - - void PostProcessNextState(State& next) const override { - if (!next.lagging.empty()) { - next.unmatched.swap(next.lagging); - } - UpdateLaggingStates(next, false); - RemoveDuplicateSeparatedStates(next); - } - - Action CalculateTransitionTag(const State& source, const State& dest) const override { - Action tag = CalculateTransitionTagImpl(dest); - if (!((TagsOfGroup(source.unmatched) | TagsOfGroup(source.separated)) & MixedTags)) { - tag &= AdvancedCountingScanner::IncrementAction; - } - return tag; - } - - StateGroup InitialGroup() const override { - return StateGroup{}; - } - - bool IsEmptyState(const State& state) const { - return state.matched.empty() && state.unmatched.empty() && state.separated.empty() && state.lagging.empty(); - } - - bool IsValidState(const State& state) const { - return state.matched.empty() && state.unmatched.size() <= 1 && state.separated.size() <= 1 && state.lagging.empty(); - } - - void SplitSeparatedByFsmTag(State& state) const { - Y_ASSERT(state.unmatched.empty()); - StateGroup separated; - separated.swap(state.separated); - SplitGroupByTag(state.matched, state.unmatched, state.separated, separated, true); - } - - void ChooseOneDestState(StateGroup& dest, const StateGroup& source, Char letter) const { - State destState; - SplitDestinations(destState.matched, destState.unmatched, destState.separated, source, letter); - if (!destState.matched.empty()) { - dest.swap(destState.matched); - } else if (!destState.separated.empty()) { - dest.swap(destState.separated); - } else if (!destState.unmatched.empty()) { - dest.swap(destState.unmatched); - } - } - - void ChooseOneSeparatedState(State& state) const { - if (state.separated.size() <= 1) { - return; - } - RemoveDuplicateSeparatedStates(state); - State splitted; - SplitGroupByTag(splitted.matched, splitted.unmatched, splitted.separated, state.separated, true); - if (!splitted.separated.empty()) { - state.separated.swap(splitted.separated); - } else if (!splitted.matched.empty()) { - state.separated.swap(splitted.matched); - } - } - -private: - TaggedState mStartState; -}; - -bool CountingFsm::Determine() { - CountingFsmDetermineTask task{mFsm, mReInitial}; - size_t maxSize = mFsm.Size() * 4096; - if (Pire::Impl::Determine(task, maxSize)) { - SwapTaskOutputs(task); - mSimple = false; - } else { - SimpleCountingFsmDetermineTask simpleTask{mFsm, mReInitial}; - if (Pire::Impl::Determine(simpleTask, std::numeric_limits<size_t>::max())) { - SwapTaskOutputs(simpleTask); - mSimple = true; - } else { - return false; - } - } - return true; -} - -void CountingFsm::Minimize() { - CountingFsmMinimizeTask task{*this}; - Pire::Impl::Minimize(task); - SwapTaskOutputs(task); -} - -void CountingFsm::SwapTaskOutputs(CountingFsmTask& task) { - task.Output().Swap(mDetermined); - task.Actions().swap(mActions); -} - -} - + } + } + } + + const LettersTbl& Letters() const { + return mFsm.Letters(); + } + + State Initial() const { + return State{StateGroup{}, InitialGroup(), StateGroup{}, StateGroup{}}; + } + + bool IsRequired(const State& state) const { + Y_UNUSED(state); + return true; + } + + State Next(const State& state, Char letter) const { + if (mInvalidLetters.count(letter) != 0) { + AddAction(state, letter, CountingFsm::NotMatched); + return Initial(); + } + + auto next = PrepareNextState(state, letter); + AddAction(state, letter, CalculateTransitionTag(state, next)); + PostProcessNextState(next); + NormalizeState(next); + + return next; + } + + void AcceptStates(const TVector<State>& states) + { + ResizeOutput(states.size()); + auto& newFsm = Output(); + auto& newActions = Actions(); + newFsm.SetInitial(0); + newFsm.SetIsDetermined(true); + + for (size_t ns = 0; ns < states.size(); ++ns) { + const auto& state = states[ns]; + newFsm.SetFinal(ns, HasFinals(state.unmatched)); + + auto outputIt = mActionByState.find(state); + if (outputIt != mActionByState.end()) { + newActions[ns].swap(outputIt->second); + } + } + } + +protected: + void SplitDestinations(StateGroup& matched, StateGroup& unmatched, StateGroup& separated, const StateGroup& source, Char letter) const { + for (const auto& state : source) { + MakeTaggedStates(matched, unmatched, separated, mFsm.Destinations(state.first, letter), state.second); + if (mFsm.IsFinal(state.first)) { + // Implicit epsilon transitions from final states to reInitial after matching separator + MakeTaggedStates(separated, separated, separated, mFsm.Destinations(mReInitial, letter), CountingFsm::Separated); + } + } + } + + Action CalculateTransitionTagImpl(const State& dest) const { + Action result = 0; + if (!dest.matched.empty()) { + result = AdvancedCountingScanner::IncrementAction; + } else if (dest.unmatched.empty()) { + if (!dest.separated.empty()) { + for (const auto& state : dest.separated) { + if (state.second == CountingFsm::Matched) { + result = AdvancedCountingScanner::IncrementAction; + } + } + } else { + result = AdvancedCountingScanner::ResetAction; + for (const auto& state : dest.lagging) { + if (state.second != CountingFsm::NotMatched) { + result |= AdvancedCountingScanner::IncrementAction; + } + } + } + } + return result; + } + + unsigned long TagsOfGroup(const StateGroup& group) const { + unsigned long result = 0; + for (const auto& state : group) { + result |= state.second; + } + return result; + } + + void SplitGroupByTag(StateGroup& matched, StateGroup& unmatched, StateGroup& separated, const StateGroup& source, bool useFsmTag) const { + for (const auto& state : source) { + auto tag = useFsmTag ? mFsm.Tag(state.first) : state.second; + if (tag == CountingFsm::Matched) { + matched.insert(state); + } else if (tag == CountingFsm::Separated) { + separated.insert(state); + } else { + unmatched.insert(state); + } + } + } + + void UpdateLaggingStates(State& state, bool moveToLagging) const { + if (!state.matched.empty()) { + if (moveToLagging) { + state.lagging.insert(state.unmatched.cbegin(), state.unmatched.cend()); + state.lagging.insert(state.separated.cbegin(), state.separated.cend()); + } + state.unmatched.clear(); + state.separated.clear(); + } + if (state.unmatched.empty() && !state.separated.empty()) { + const auto unmatchedTags = TagsOfGroup(state.separated); + if ((unmatchedTags & CountingFsm::Matched) && (unmatchedTags != CountingFsm::Matched)) { + StateGroup separatedMatched; + for (const auto& separatedState : state.separated) { + if (separatedState.second == CountingFsm::Matched) { + separatedMatched.insert(separatedState); + } else if (moveToLagging) { + state.lagging.insert(separatedState); + } + } + state.separated.swap(separatedMatched); + } + } + } + + void RemoveDuplicateLaggingStates(State& state) const { + const auto statesToRemove = GetRawStates({state.matched, state.unmatched, state.separated}, 0); + const auto unmatchedStatesToRemove = GetRawStates({state.lagging}, CountingFsm::NotMatched); + + StateGroup newLagging; + for (const auto& taggedState : state.lagging) { + if (statesToRemove.count(taggedState.first) == 0) { + if (taggedState.second != CountingFsm::NotMatched || unmatchedStatesToRemove.count(taggedState.first) == 0) { + newLagging.insert(taggedState); + } + } + } + state.lagging.swap(newLagging); + } + + void RemoveDuplicateSeparatedStates(State& state) const { + if (state.separated.empty()) { + return; + } + const auto statesToRemove = GetRawStates({state.matched, state.unmatched}, 0); + RemoveRawStates(state.separated, statesToRemove); + } + + void NormalizeState(State& state) const { + if (!state.matched.empty()) { + Y_ASSERT(state.unmatched.empty()); + state.unmatched.swap(state.matched); + } + + if (state.unmatched.empty() && !state.separated.empty()) { + state.unmatched.swap(state.separated); + } + + if (state.unmatched.empty() && !state.lagging.empty()) { + State groups; + SplitGroupByTag(groups.matched, groups.unmatched, groups.separated, state.lagging, false); + if (!groups.matched.empty()) { + state.unmatched.swap(groups.matched); + state.separated.swap(groups.separated); + state.lagging.swap(groups.unmatched); + } else if (!groups.separated.empty()) { + state.unmatched.swap(groups.separated); + state.lagging.swap(groups.unmatched); + } else { + state.unmatched.swap(groups.unmatched); + state.lagging.swap(groups.matched); // just clear + } + } + } + +private: + virtual State PrepareNextState(const State& state, Char letter) const = 0; + + virtual void PostProcessNextState(State& next) const = 0; + + virtual Action CalculateTransitionTag(const State& source, const State& dest) const { + Y_UNUSED(source); + return CalculateTransitionTagImpl(dest); + } + + virtual StateGroup InitialGroup() const { + return StateGroup{TaggedState{mFsm.Initial(), CountingFsm::NotMatched}}; + } + + void AddAction(State from, Char letter, unsigned long value) const { + if (!value) { + return; + } + TransitionTagRow& row = mActionByState[from]; + row[letter] = value; + } + + void MakeTaggedStates(StateGroup& matched, StateGroup& unmatched, StateGroup& separated, const Fsm::StatesSet& destinations, unsigned long sourceTag) const { + for (const auto destState : destinations) { + if (mDeadStates.count(destState) == 0) { + const auto destTag = mFsm.Tag(destState); + if (sourceTag != CountingFsm::Matched && destTag == CountingFsm::Matched) { + matched.insert(ymake_pair(destState, destTag)); + } else if (sourceTag == CountingFsm::Separated || destTag == CountingFsm::Separated) { + separated.insert(ymake_pair(destState, CountingFsm::Separated)); + } else { + unmatched.insert(ymake_pair(destState, sourceTag)); + } + } + } + } + + bool HasFinals(const StateGroup& states) const { + for (const auto& state : states) { + if (mFsm.IsFinal(state.first)) { + return true; + } + } + return false; + } + + Fsm::StatesSet GetRawStates(const TVector<std::reference_wrapper<const StateGroup>> groups, unsigned long excludedTags) const { + Fsm::StatesSet result; + for (const auto& group : groups) { + for (const auto& taggedState : group.get()) { + if (!(taggedState.second & excludedTags)) { + result.insert(taggedState.first); + } + } + } + return result; + } + + void RemoveRawStates(StateGroup& group, const Fsm::StatesSet& states) const { + StateGroup removing; + for (const auto& taggedState : group) { + if (states.count(taggedState.first) != 0) { + removing.insert(taggedState); + } + } + for (const auto& taggedState : removing) { + group.erase(taggedState); + } + } + +private: + const Fsm& mFsm; + RawState mReInitial; + Fsm::StatesSet mDeadStates; + TSet<Char> mInvalidLetters; + + mutable TMap<State, TransitionTagRow> mActionByState; +}; + +class CountingFsmDetermineTask : public BasicCountingFsmDetermineTask { +public: + using BasicCountingFsmDetermineTask::State; + using BasicCountingFsmDetermineTask::LettersTbl; + using BasicCountingFsmDetermineTask::InvStates; + + explicit CountingFsmDetermineTask(const Fsm& fsm, RawState reInitial) + : BasicCountingFsmDetermineTask{fsm, reInitial} + {} + +private: + State PrepareNextState(const State& state, Char letter) const override { + State next; + SplitDestinations(next.matched, next.unmatched, next.separated, state.unmatched, letter); + SplitDestinations(next.separated, next.separated, next.separated, state.separated, letter); + SplitDestinations(next.lagging, next.lagging, next.lagging, state.lagging, letter); + return next; + } + + void PostProcessNextState(State& next) const override { + UpdateLaggingStates(next, true); + RemoveDuplicateLaggingStates(next); + RemoveDuplicateSeparatedStates(next); + } +}; + +class SimpleCountingFsmDetermineTask : public BasicCountingFsmDetermineTask { +public: + using BasicCountingFsmDetermineTask::State; + using BasicCountingFsmDetermineTask::LettersTbl; + using BasicCountingFsmDetermineTask::InvStates; + + static constexpr unsigned long MixedTags = CountingFsm::Separated | CountingFsm::Matched; + + SimpleCountingFsmDetermineTask(const Fsm& fsm, RawState reInitial) + : BasicCountingFsmDetermineTask{fsm, reInitial} + , mStartState{reInitial, CountingFsm::NotMatched} + {} + +private: + State PrepareNextState(const State& state, Char letter) const override { + State next; + auto from = state; + const auto fromIsEmpty = IsEmptyState(from); + if (fromIsEmpty) { + from.unmatched.insert(mStartState); + } + Y_ASSERT(IsValidState(from)); + + SplitDestinations(next.matched, next.unmatched, next.separated, from.unmatched, letter); + if (next.matched.empty() && !next.separated.empty()) { + if (next.unmatched.empty()) { + SplitSeparatedByFsmTag(next); + if (next.separated.size() > 1) { + RemoveDuplicateSeparatedStates(next); + } + if (next.unmatched.empty()) { + next.unmatched.swap(next.separated); + } + } else { + ChooseOneSeparatedState(next); + } + } + if (next.matched.empty() && next.separated.empty() && !from.separated.empty()) { + if (!next.unmatched.empty()) { + ChooseOneDestState(next.separated, from.separated, letter); + } else { + SplitDestinations(next.matched, next.unmatched, next.separated, from.separated, letter); + if (next.matched.empty() && !next.separated.empty()) { + SplitSeparatedByFsmTag(next); + } + } + ChooseOneSeparatedState(next); + } + if (!fromIsEmpty && IsEmptyState(next)) { + ChooseOneDestState(next.lagging, StateGroup{mStartState}, letter); + } + + return next; + } + + void PostProcessNextState(State& next) const override { + if (!next.lagging.empty()) { + next.unmatched.swap(next.lagging); + } + UpdateLaggingStates(next, false); + RemoveDuplicateSeparatedStates(next); + } + + Action CalculateTransitionTag(const State& source, const State& dest) const override { + Action tag = CalculateTransitionTagImpl(dest); + if (!((TagsOfGroup(source.unmatched) | TagsOfGroup(source.separated)) & MixedTags)) { + tag &= AdvancedCountingScanner::IncrementAction; + } + return tag; + } + + StateGroup InitialGroup() const override { + return StateGroup{}; + } + + bool IsEmptyState(const State& state) const { + return state.matched.empty() && state.unmatched.empty() && state.separated.empty() && state.lagging.empty(); + } + + bool IsValidState(const State& state) const { + return state.matched.empty() && state.unmatched.size() <= 1 && state.separated.size() <= 1 && state.lagging.empty(); + } + + void SplitSeparatedByFsmTag(State& state) const { + Y_ASSERT(state.unmatched.empty()); + StateGroup separated; + separated.swap(state.separated); + SplitGroupByTag(state.matched, state.unmatched, state.separated, separated, true); + } + + void ChooseOneDestState(StateGroup& dest, const StateGroup& source, Char letter) const { + State destState; + SplitDestinations(destState.matched, destState.unmatched, destState.separated, source, letter); + if (!destState.matched.empty()) { + dest.swap(destState.matched); + } else if (!destState.separated.empty()) { + dest.swap(destState.separated); + } else if (!destState.unmatched.empty()) { + dest.swap(destState.unmatched); + } + } + + void ChooseOneSeparatedState(State& state) const { + if (state.separated.size() <= 1) { + return; + } + RemoveDuplicateSeparatedStates(state); + State splitted; + SplitGroupByTag(splitted.matched, splitted.unmatched, splitted.separated, state.separated, true); + if (!splitted.separated.empty()) { + state.separated.swap(splitted.separated); + } else if (!splitted.matched.empty()) { + state.separated.swap(splitted.matched); + } + } + +private: + TaggedState mStartState; +}; + +bool CountingFsm::Determine() { + CountingFsmDetermineTask task{mFsm, mReInitial}; + size_t maxSize = mFsm.Size() * 4096; + if (Pire::Impl::Determine(task, maxSize)) { + SwapTaskOutputs(task); + mSimple = false; + } else { + SimpleCountingFsmDetermineTask simpleTask{mFsm, mReInitial}; + if (Pire::Impl::Determine(simpleTask, std::numeric_limits<size_t>::max())) { + SwapTaskOutputs(simpleTask); + mSimple = true; + } else { + return false; + } + } + return true; +} + +void CountingFsm::Minimize() { + CountingFsmMinimizeTask task{*this}; + Pire::Impl::Minimize(task); + SwapTaskOutputs(task); +} + +void CountingFsm::SwapTaskOutputs(CountingFsmTask& task) { + task.Output().Swap(mDetermined); + task.Actions().swap(mActions); +} + +} + namespace { Pire::Fsm FsmForDot() { Pire::Fsm f; f.AppendDot(); return f; } Pire::Fsm FsmForChar(Pire::Char c) { Pire::Fsm f; f.AppendSpecial(c); return f; } @@ -840,32 +840,32 @@ CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep) namespace Impl { template <class AdvancedScanner> AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) { - Impl::CountingFsm countingFsm{re, sep}; - if (!countingFsm.Determine()) { - throw Error("regexp pattern too complicated"); - } - countingFsm.Minimize(); - if (simple) { - *simple = countingFsm.Simple(); - } - - const auto& determined = countingFsm.Determined(); - const auto& letters = countingFsm.Letters(); - + Impl::CountingFsm countingFsm{re, sep}; + if (!countingFsm.Determine()) { + throw Error("regexp pattern too complicated"); + } + countingFsm.Minimize(); + if (simple) { + *simple = countingFsm.Simple(); + } + + const auto& determined = countingFsm.Determined(); + const auto& letters = countingFsm.Letters(); + AdvancedScanner scanner; scanner.Init(determined.Size(), letters, determined.Initial(), 1); - for (size_t from = 0; from != determined.Size(); ++from) { + for (size_t from = 0; from != determined.Size(); ++from) { for (auto&& lettersEl : letters) { const auto letter = lettersEl.first; - const auto& tos = determined.Destinations(from, letter); - Y_ASSERT(tos.size() == 1); + const auto& tos = determined.Destinations(from, letter); + Y_ASSERT(tos.size() == 1); scanner.SetJump(from, letter, *tos.begin(), scanner.RemapAction(countingFsm.Output(from, letter))); - } - } + } + } return scanner; -} +} } // namespace Impl - + AdvancedCountingScanner::AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) : AdvancedCountingScanner(Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(re, sep, simple)) { @@ -879,16 +879,16 @@ NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm& namespace Impl { -template<class Scanner> -class CountingScannerGlueTask: public ScannerGlueCommon<Scanner> { +template<class Scanner> +class CountingScannerGlueTask: public ScannerGlueCommon<Scanner> { public: - using typename ScannerGlueCommon<Scanner>::State; - using TAction = typename Scanner::Action; - using InternalState = typename Scanner::InternalState; + using typename ScannerGlueCommon<Scanner>::State; + using TAction = typename Scanner::Action; + using InternalState = typename Scanner::InternalState; typedef TMap<State, size_t> InvStates; - CountingScannerGlueTask(const Scanner& lhs, const Scanner& rhs) - : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters)) + CountingScannerGlueTask(const Scanner& lhs, const Scanner& rhs) + : ScannerGlueCommon<Scanner>(lhs, rhs, LettersEquality<Scanner>(lhs.m_letters, rhs.m_letters)) { } @@ -896,25 +896,25 @@ public: { States = states; this->SetSc(THolder<Scanner>(new Scanner)); - this->Sc().Init(states.size(), this->Letters(), 0, this->Lhs().RegexpsCount() + this->Rhs().RegexpsCount()); + this->Sc().Init(states.size(), this->Letters(), 0, this->Lhs().RegexpsCount() + this->Rhs().RegexpsCount()); for (size_t i = 0; i < states.size(); ++i) - this->Sc().SetTag(i, this->Lhs().m_tags[this->Lhs().StateIdx(states[i].first)] | (this->Rhs().m_tags[this->Rhs().StateIdx(states[i].second)] << 3)); + this->Sc().SetTag(i, this->Lhs().m_tags[this->Lhs().StateIdx(states[i].first)] | (this->Rhs().m_tags[this->Rhs().StateIdx(states[i].second)] << 3)); } void Connect(size_t from, size_t to, Char letter) { - this->Sc().SetJump(from, letter, to, - Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount())); + this->Sc().SetJump(from, letter, to, + Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount())); } protected: TVector<State> States; - TAction Action(const Scanner& sc, InternalState state, Char letter) const + TAction Action(const Scanner& sc, InternalState state, Char letter) const { size_t state_index = sc.StateIdx(state); size_t transition_index = sc.TransitionIndex(state_index, letter); - const auto& tr = sc.m_jumps[transition_index]; + const auto& tr = sc.m_jumps[transition_index]; return tr.action; } }; @@ -988,26 +988,26 @@ CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const Counting return CountingScanner(); } static constexpr size_t DefMaxSize = 250000; - Impl::CountingScannerGlueTask<CountingScanner> task(lhs, rhs); + Impl::CountingScannerGlueTask<CountingScanner> task(lhs, rhs); return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); } -AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScanner& lhs, const AdvancedCountingScanner& rhs, size_t maxSize /* = 0 */) -{ +AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScanner& lhs, const AdvancedCountingScanner& rhs, size_t maxSize /* = 0 */) +{ if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { return AdvancedCountingScanner(); } static constexpr size_t DefMaxSize = 250000; - Impl::CountingScannerGlueTask<AdvancedCountingScanner> task(lhs, rhs); - return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); + Impl::CountingScannerGlueTask<AdvancedCountingScanner> task(lhs, rhs); + return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); } - + NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */) { static constexpr size_t DefMaxSize = 250000; Impl::NoGlueLimitCountingScannerGlueTask task(lhs, rhs); return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); -} +} // Should Save(), Load() and Mmap() functions return stream/pointer in aligned state? // Now they don't because tests don't require it. diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h index bd1526b98d..8ec9fc0ebc 100644 --- a/contrib/libs/pire/pire/extra/count.h +++ b/contrib/libs/pire/pire/extra/count.h @@ -33,11 +33,11 @@ namespace Pire { class Fsm; namespace Impl { - template<class T> - class ScannerGlueCommon; + template<class T> + class ScannerGlueCommon; - template<class T> - class CountingScannerGlueTask; + template<class T> + class CountingScannerGlueTask; class NoGlueLimitCountingScannerGlueTask; @@ -116,7 +116,7 @@ public: * in input text. */ template<class DerivedScanner, class State> -class BaseCountingScanner: public LoadedScanner { +class BaseCountingScanner: public LoadedScanner { public: enum { IncrementAction = 1, @@ -169,13 +169,13 @@ public: bool Dead(const State&) const { return false; } - using LoadedScanner::Swap; + using LoadedScanner::Swap; size_t StateIndex(const State& s) const { return StateIdx(s.m_state); } -protected: +protected: using LoadedScanner::Init; - using LoadedScanner::InternalState; + using LoadedScanner::InternalState; template<size_t ActualReCount> void PerformIncrement(State& s, Action mask) const @@ -201,11 +201,11 @@ protected: Transition x = reinterpret_cast<const Transition*>(s)[Translate(c)]; s += SignExtend(x.shift); } -}; +}; template <size_t MAX_RE_COUNT> class CountingState { -public: +public: size_t Result(int i) const { return ymax(m_current[i], m_total[i]); } private: using InternalState = LoadedScanner::InternalState; @@ -238,26 +238,26 @@ private: class CountingScanner : public BaseCountingScanner<CountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { public: using State = CountingState<MAX_RE_COUNT>; - enum { - Matched = 2, - }; - - CountingScanner() {} - CountingScanner(const Fsm& re, const Fsm& sep); - - static CountingScanner Glue(const CountingScanner& a, const CountingScanner& b, size_t maxSize = 0); - - template<size_t ActualReCount> - PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - void TakeActionImpl(State& s, Action a) const - { - if (a & IncrementMask) - PerformIncrement<ActualReCount>(s, a); - if (a & ResetMask) - PerformReset<ActualReCount>(s, a); - } - -private: + enum { + Matched = 2, + }; + + CountingScanner() {} + CountingScanner(const Fsm& re, const Fsm& sep); + + static CountingScanner Glue(const CountingScanner& a, const CountingScanner& b, size_t maxSize = 0); + + template<size_t ActualReCount> + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION + void TakeActionImpl(State& s, Action a) const + { + if (a & IncrementMask) + PerformIncrement<ActualReCount>(s, a); + if (a & ResetMask) + PerformReset<ActualReCount>(s, a); + } + +private: Action RemapAction(Action action) { if (action == (Matched | DeadFlag)) @@ -270,48 +270,48 @@ private: friend void BuildScanner<CountingScanner>(const Fsm&, CountingScanner&); friend class Impl::ScannerGlueCommon<CountingScanner>; - friend class Impl::CountingScannerGlueTask<CountingScanner>; + friend class Impl::CountingScannerGlueTask<CountingScanner>; }; class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { -public: +public: using State = CountingState<MAX_RE_COUNT>; - AdvancedCountingScanner() {} - AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr); - - static AdvancedCountingScanner Glue(const AdvancedCountingScanner& a, const AdvancedCountingScanner& b, size_t maxSize = 0); - - template<size_t ActualReCount> - PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - void TakeActionImpl(State& s, Action a) const - { - if (a & ResetMask) { - PerformReset<ActualReCount>(s, a); - } - if (a & IncrementMask) { - PerformIncrement<ActualReCount>(s, a); - } - } - -private: - Action RemapAction(Action action) - { - Action result = 0; - if (action & ResetAction) { - result = 1 << MAX_RE_COUNT; - } - if (action & IncrementAction) { - result |= 1; - } - return result; - } - - friend class Impl::ScannerGlueCommon<AdvancedCountingScanner>; - friend class Impl::CountingScannerGlueTask<AdvancedCountingScanner>; + AdvancedCountingScanner() {} + AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr); + + static AdvancedCountingScanner Glue(const AdvancedCountingScanner& a, const AdvancedCountingScanner& b, size_t maxSize = 0); + + template<size_t ActualReCount> + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION + void TakeActionImpl(State& s, Action a) const + { + if (a & ResetMask) { + PerformReset<ActualReCount>(s, a); + } + if (a & IncrementMask) { + PerformIncrement<ActualReCount>(s, a); + } + } + +private: + Action RemapAction(Action action) + { + Action result = 0; + if (action & ResetAction) { + result = 1 << MAX_RE_COUNT; + } + if (action & IncrementAction) { + result |= 1; + } + return result; + } + + friend class Impl::ScannerGlueCommon<AdvancedCountingScanner>; + friend class Impl::CountingScannerGlueTask<AdvancedCountingScanner>; friend AdvancedCountingScanner Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(const Fsm&, const Fsm&, bool*); -}; - +}; + class NoGlueLimitCountingState { public: size_t Result(int i) const { return ymax(m_current[i], m_total[i]); } diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp index 984d708dfa..4dd1fbcc17 100644 --- a/contrib/libs/pire/pire/fsm.cpp +++ b/contrib/libs/pire/pire/fsm.cpp @@ -38,7 +38,7 @@ #include "vbitset.h" #include "partition.h" #include "determine.h" -#include "minimize.h" +#include "minimize.h" #include "platform.h" namespace Pire { @@ -1034,128 +1034,128 @@ bool Fsm::Determine(size_t maxsize /* = 0 */) return false; } -namespace Impl { -class FsmMinimizeTask { +namespace Impl { +class FsmMinimizeTask { public: - explicit FsmMinimizeTask(const Fsm& fsm) - : mFsm(fsm) - , reversedTransitions(fsm.Size()) - , StateClass(fsm.Size()) - , Classes(0) - { - Y_ASSERT(mFsm.IsDetermined()); - - TMap<bool, size_t> FinalStateClassMap; - - for (size_t state = 0; state < mFsm.Size(); ++state) { - reversedTransitions[state].resize(mFsm.Letters().Size()); - if (FinalStateClassMap.find(mFsm.IsFinal(state)) == FinalStateClassMap.end()) { - FinalStateClassMap[mFsm.IsFinal(state)] = Classes++; - } - StateClass[state] = FinalStateClassMap[mFsm.IsFinal(state)]; - } - - for (size_t state = 0; state < mFsm.Size(); ++state) { - TSet<ypair<Char, size_t>> usedTransitions; - for (const auto& transition : mFsm.m_transitions[state]) { - Y_ASSERT(transition.second.size() == 1); - auto destination = *transition.second.begin(); - auto letter = mFsm.Letters().Index(transition.first); - if (usedTransitions.find(ymake_pair(letter, destination)) == usedTransitions.end()) { - usedTransitions.insert(ymake_pair(letter, destination)); - reversedTransitions[destination][letter].push_back(state); - } - } - } - } - - TVector<size_t>& GetStateClass() { return StateClass; } - - size_t& GetClassesNumber() { return Classes; } - - size_t LettersCount() const { - return mFsm.Letters().Size(); + explicit FsmMinimizeTask(const Fsm& fsm) + : mFsm(fsm) + , reversedTransitions(fsm.Size()) + , StateClass(fsm.Size()) + , Classes(0) + { + Y_ASSERT(mFsm.IsDetermined()); + + TMap<bool, size_t> FinalStateClassMap; + + for (size_t state = 0; state < mFsm.Size(); ++state) { + reversedTransitions[state].resize(mFsm.Letters().Size()); + if (FinalStateClassMap.find(mFsm.IsFinal(state)) == FinalStateClassMap.end()) { + FinalStateClassMap[mFsm.IsFinal(state)] = Classes++; + } + StateClass[state] = FinalStateClassMap[mFsm.IsFinal(state)]; + } + + for (size_t state = 0; state < mFsm.Size(); ++state) { + TSet<ypair<Char, size_t>> usedTransitions; + for (const auto& transition : mFsm.m_transitions[state]) { + Y_ASSERT(transition.second.size() == 1); + auto destination = *transition.second.begin(); + auto letter = mFsm.Letters().Index(transition.first); + if (usedTransitions.find(ymake_pair(letter, destination)) == usedTransitions.end()) { + usedTransitions.insert(ymake_pair(letter, destination)); + reversedTransitions[destination][letter].push_back(state); + } + } + } + } + + TVector<size_t>& GetStateClass() { return StateClass; } + + size_t& GetClassesNumber() { return Classes; } + + size_t LettersCount() const { + return mFsm.Letters().Size(); + } + + bool IsDetermined() const { + return mFsm.IsDetermined(); + } + + size_t Size() const { + return mFsm.Size(); } - bool IsDetermined() const { - return mFsm.IsDetermined(); + const TVector<size_t>& Previous(size_t state, size_t letter) const { + return reversedTransitions[state][letter]; } - size_t Size() const { - return mFsm.Size(); - } - - const TVector<size_t>& Previous(size_t state, size_t letter) const { - return reversedTransitions[state][letter]; - } + void AcceptStates() { + mNewFsm.Resize(Classes); + mNewFsm.letters = mFsm.letters; + mNewFsm.determined = mFsm.determined; + mNewFsm.m_sparsed = mFsm.m_sparsed; + mNewFsm.SetFinal(0, false); - void AcceptStates() { - mNewFsm.Resize(Classes); - mNewFsm.letters = mFsm.letters; - mNewFsm.determined = mFsm.determined; - mNewFsm.m_sparsed = mFsm.m_sparsed; - mNewFsm.SetFinal(0, false); - - // Unite equality classes into new states - size_t fromIdx = 0; + // Unite equality classes into new states + size_t fromIdx = 0; for (auto from = mFsm.m_transitions.begin(), fromEnd = mFsm.m_transitions.end(); from != fromEnd; ++from, ++fromIdx) { - size_t dest = StateClass[fromIdx]; - PIRE_IFDEBUG(Cdbg << "[min] State " << fromIdx << " becomes state " << dest << Endl); + size_t dest = StateClass[fromIdx]; + PIRE_IFDEBUG(Cdbg << "[min] State " << fromIdx << " becomes state " << dest << Endl); for (auto&& letter : *from) { Y_ASSERT(letter.second.size() == 1 || !"FSM::minimize(): FSM not deterministic"); - mNewFsm.Connect(dest, StateClass[*letter.second.begin()], letter.first); - } - if (mFsm.IsFinal(fromIdx)) { - mNewFsm.SetFinal(dest, true); - PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " becomes final because of old state " << fromIdx << Endl); - } - - // Append tags + mNewFsm.Connect(dest, StateClass[*letter.second.begin()], letter.first); + } + if (mFsm.IsFinal(fromIdx)) { + mNewFsm.SetFinal(dest, true); + PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " becomes final because of old state " << fromIdx << Endl); + } + + // Append tags auto ti = mFsm.tags.find(fromIdx); - if (ti != mFsm.tags.end()) { - mNewFsm.tags[dest] |= ti->second; - PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " carries tag " << ti->second << " because of old state " << fromIdx << Endl); - } + if (ti != mFsm.tags.end()) { + mNewFsm.tags[dest] |= ti->second; + PIRE_IFDEBUG(Cdbg << "[min] New state " << dest << " carries tag " << ti->second << " because of old state " << fromIdx << Endl); + } } - mNewFsm.initial = StateClass[mFsm.initial]; - - // Restore outputs + mNewFsm.initial = StateClass[mFsm.initial]; + + // Restore outputs for (auto&& output : mFsm.outputs) for (auto&& output2 : output.second) - mNewFsm.outputs[StateClass[output.first]].insert(ymake_pair(StateClass[output2.first], output2.second)); + mNewFsm.outputs[StateClass[output.first]].insert(ymake_pair(StateClass[output2.first], output2.second)); } - typedef bool Result; + typedef bool Result; - Result Success() { - return true; - } + Result Success() { + return true; + } - Result Failure() { - return false; - } + Result Failure() { + return false; + } - Fsm& Output() { - return mNewFsm; + Fsm& Output() { + return mNewFsm; } -private: - const Fsm& mFsm; - Fsm mNewFsm; - TVector<TVector<TVector<size_t>>> reversedTransitions; - TVector<size_t> StateClass; - size_t Classes; -}; -} - -void Fsm::Minimize() -{ - // Minimization algorithm is only applicable to a determined FSM. - Y_ASSERT(determined); - - Impl::FsmMinimizeTask task{*this}; - if (Pire::Impl::Minimize(task)) { - task.Output().Swap(*this); +private: + const Fsm& mFsm; + Fsm mNewFsm; + TVector<TVector<TVector<size_t>>> reversedTransitions; + TVector<size_t> StateClass; + size_t Classes; +}; +} + +void Fsm::Minimize() +{ + // Minimization algorithm is only applicable to a determined FSM. + Y_ASSERT(determined); + + Impl::FsmMinimizeTask task{*this}; + if (Pire::Impl::Minimize(task)) { + task.Output().Swap(*this); } } diff --git a/contrib/libs/pire/pire/fsm.h b/contrib/libs/pire/pire/fsm.h index 4dad06ca06..c2ae1836e8 100644 --- a/contrib/libs/pire/pire/fsm.h +++ b/contrib/libs/pire/pire/fsm.h @@ -34,7 +34,7 @@ namespace Pire { namespace Impl { class FsmDetermineTask; - class FsmMinimizeTask; + class FsmMinimizeTask; class HalfFinalDetermineTask; } @@ -245,7 +245,7 @@ namespace Pire { void ClearHints() { isAlternative = false; } friend class Impl::FsmDetermineTask; - friend class Impl::FsmMinimizeTask; + friend class Impl::FsmMinimizeTask; friend class Impl::HalfFinalDetermineTask; }; diff --git a/contrib/libs/pire/pire/minimize.h b/contrib/libs/pire/pire/minimize.h index d58c5ce79e..4f42688e6f 100644 --- a/contrib/libs/pire/pire/minimize.h +++ b/contrib/libs/pire/pire/minimize.h @@ -1,153 +1,153 @@ -#ifndef PIRE_MINIMIZE_H -#define PIRE_MINIMIZE_H - -#include "stub/stl.h" -#include "partition.h" - -namespace Pire { - namespace Impl { - - /** - * An interface of a minimization task. - * You don't have to derive from this class; it is just a start point template. - */ - class MinimizeTask { - private: - struct ImplementationSpecific1; - - public: - // States must be represented by size_t. - - /// States must be initially divided into some equivalence classes. - /// If states are in the same equivalence class, they may be merged without loosing state specific info. - /// Equivalence classes must have indexes from 0 to (Classes - 1). - /// The algorithm will modify equivalent classes and in the end - /// all states in the same equivalent class can be merged into one state - TVector<size_t>& GetStateClass() { return StateClass; } - - /// Returns number of equivalent classes - size_t& GetClassesNumber() { return Classes; } - - /// Should return number of letter classes - size_t LettersCount() const; - - /// Should return true if FSM is determined. - bool IsDetermined() const; - - /// Should return number of states. - size_t Size() const; - - /// Should calculate vector of previous states by, given the current state and incoming letter class index. - const TVector<size_t>& Previous(size_t state, size_t letter) const; - - /// Called when states equivalent classes are formed, and written in StateClass. - void AcceptStates(); - - typedef bool Result; - - Result Success() { return true; } - - Result Failure() { return false; } - - private: - TVector<size_t> StateClass; - - size_t Classes; - }; - - // Minimizes Determined FSM using Hopcroft algorithm, works in O(Size * log(Size) * MaxChar) time, - // requires O(Size * MaxChar * sizof(size_t)) memory. - template<class Task> - typename Task::Result Minimize(Task& task) - { - // Minimization algorithm is only applicable to a determined FSM. - if (!task.IsDetermined()) { - return task.Failure(); - } - - typedef ypair<size_t, size_t> ClassLetter; - - TVector<ybitset<MaxChar>> queuedClasses(task.Size()); - - TDeque<ClassLetter> classesToProcess; - - TVector<TVector<size_t>> classStates(task.Size()); - - TVector<size_t>& stateClass = task.GetStateClass(); - - for (size_t state = 0; state < task.Size(); ++state) { - classStates[stateClass[state]].push_back(state); - } - - for (size_t classIndex = 0; classIndex < task.GetClassesNumber(); ++classIndex) { - for (size_t letter = 0; letter < task.LettersCount(); ++letter) { - classesToProcess.push_back(ymake_pair(classIndex, letter)); - queuedClasses[classIndex][letter] = 1; - } - } - - TVector<size_t> classChange(task.Size()); - TVector<TVector<size_t>> removedStates(task.Size()); - - while (classesToProcess.size()) { - const auto currentClass = classesToProcess.front().first; - const auto currentLetter = classesToProcess.front().second; - classesToProcess.pop_front(); - queuedClasses[currentClass][currentLetter] = 0; - TVector<size_t> splittedClasses; - - for (const auto& classState : classStates[currentClass]) { - for (const auto& state: task.Previous(classState, currentLetter)) { - if (classChange[stateClass[state]] != task.GetClassesNumber()) { - classChange[stateClass[state]] = task.GetClassesNumber(); - splittedClasses.push_back(stateClass[state]); - } - removedStates[stateClass[state]].push_back(state); - } - } - - - for (const auto& splittedClass : splittedClasses) { - if (removedStates[splittedClass].size() == classStates[splittedClass].size()) { - classChange[splittedClass] = 0; - removedStates[splittedClass].clear(); - continue; - } - - const auto newClass = task.GetClassesNumber()++; - classChange[splittedClass] = newClass; - std::swap(classStates[newClass], removedStates[splittedClass]); - for (const auto& state : classStates[newClass]) { - stateClass[state] = newClass; - } - - auto iter = classStates[splittedClass].begin(); - for (const auto state : classStates[splittedClass]) { - if (stateClass[state] == splittedClass) { - *iter = state; - ++iter; - } - } - classStates[splittedClass].erase(iter, classStates[splittedClass].end()); - - for (size_t letter = 0; letter < task.LettersCount(); ++letter) { - if (queuedClasses[splittedClass][letter] - || classStates[splittedClass].size() > classStates[newClass].size()) { - - queuedClasses[newClass][letter] = 1; - classesToProcess.push_back(ymake_pair(newClass, letter)); - } else { - queuedClasses[splittedClass][letter] = 1; - classesToProcess.push_back(ymake_pair(splittedClass, letter)); - } - } - } - } - - task.AcceptStates(); - return task.Success(); - } - } -} - -#endif +#ifndef PIRE_MINIMIZE_H +#define PIRE_MINIMIZE_H + +#include "stub/stl.h" +#include "partition.h" + +namespace Pire { + namespace Impl { + + /** + * An interface of a minimization task. + * You don't have to derive from this class; it is just a start point template. + */ + class MinimizeTask { + private: + struct ImplementationSpecific1; + + public: + // States must be represented by size_t. + + /// States must be initially divided into some equivalence classes. + /// If states are in the same equivalence class, they may be merged without loosing state specific info. + /// Equivalence classes must have indexes from 0 to (Classes - 1). + /// The algorithm will modify equivalent classes and in the end + /// all states in the same equivalent class can be merged into one state + TVector<size_t>& GetStateClass() { return StateClass; } + + /// Returns number of equivalent classes + size_t& GetClassesNumber() { return Classes; } + + /// Should return number of letter classes + size_t LettersCount() const; + + /// Should return true if FSM is determined. + bool IsDetermined() const; + + /// Should return number of states. + size_t Size() const; + + /// Should calculate vector of previous states by, given the current state and incoming letter class index. + const TVector<size_t>& Previous(size_t state, size_t letter) const; + + /// Called when states equivalent classes are formed, and written in StateClass. + void AcceptStates(); + + typedef bool Result; + + Result Success() { return true; } + + Result Failure() { return false; } + + private: + TVector<size_t> StateClass; + + size_t Classes; + }; + + // Minimizes Determined FSM using Hopcroft algorithm, works in O(Size * log(Size) * MaxChar) time, + // requires O(Size * MaxChar * sizof(size_t)) memory. + template<class Task> + typename Task::Result Minimize(Task& task) + { + // Minimization algorithm is only applicable to a determined FSM. + if (!task.IsDetermined()) { + return task.Failure(); + } + + typedef ypair<size_t, size_t> ClassLetter; + + TVector<ybitset<MaxChar>> queuedClasses(task.Size()); + + TDeque<ClassLetter> classesToProcess; + + TVector<TVector<size_t>> classStates(task.Size()); + + TVector<size_t>& stateClass = task.GetStateClass(); + + for (size_t state = 0; state < task.Size(); ++state) { + classStates[stateClass[state]].push_back(state); + } + + for (size_t classIndex = 0; classIndex < task.GetClassesNumber(); ++classIndex) { + for (size_t letter = 0; letter < task.LettersCount(); ++letter) { + classesToProcess.push_back(ymake_pair(classIndex, letter)); + queuedClasses[classIndex][letter] = 1; + } + } + + TVector<size_t> classChange(task.Size()); + TVector<TVector<size_t>> removedStates(task.Size()); + + while (classesToProcess.size()) { + const auto currentClass = classesToProcess.front().first; + const auto currentLetter = classesToProcess.front().second; + classesToProcess.pop_front(); + queuedClasses[currentClass][currentLetter] = 0; + TVector<size_t> splittedClasses; + + for (const auto& classState : classStates[currentClass]) { + for (const auto& state: task.Previous(classState, currentLetter)) { + if (classChange[stateClass[state]] != task.GetClassesNumber()) { + classChange[stateClass[state]] = task.GetClassesNumber(); + splittedClasses.push_back(stateClass[state]); + } + removedStates[stateClass[state]].push_back(state); + } + } + + + for (const auto& splittedClass : splittedClasses) { + if (removedStates[splittedClass].size() == classStates[splittedClass].size()) { + classChange[splittedClass] = 0; + removedStates[splittedClass].clear(); + continue; + } + + const auto newClass = task.GetClassesNumber()++; + classChange[splittedClass] = newClass; + std::swap(classStates[newClass], removedStates[splittedClass]); + for (const auto& state : classStates[newClass]) { + stateClass[state] = newClass; + } + + auto iter = classStates[splittedClass].begin(); + for (const auto state : classStates[splittedClass]) { + if (stateClass[state] == splittedClass) { + *iter = state; + ++iter; + } + } + classStates[splittedClass].erase(iter, classStates[splittedClass].end()); + + for (size_t letter = 0; letter < task.LettersCount(); ++letter) { + if (queuedClasses[splittedClass][letter] + || classStates[splittedClass].size() > classStates[newClass].size()) { + + queuedClasses[newClass][letter] = 1; + classesToProcess.push_back(ymake_pair(newClass, letter)); + } else { + queuedClasses[splittedClass][letter] = 1; + classesToProcess.push_back(ymake_pair(splittedClass, letter)); + } + } + } + } + + task.AcceptStates(); + return task.Success(); + } + } +} + +#endif |