diff options
author | jakovenko-dm <jakovenko-dm@yandex-team.ru> | 2022-02-10 16:48:06 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:06 +0300 |
commit | 7077baee21e33a3ad2e790527b1c50b22c244db3 (patch) | |
tree | e719eb81a7dbb542f49340ad8c36c65d58ac42f6 | |
parent | 4282ec504ababea092138c3af45d5399d01c194a (diff) | |
download | ydb-7077baee21e33a3ad2e790527b1c50b22c244db3.tar.gz |
Restoring authorship annotation for <jakovenko-dm@yandex-team.ru>. Commit 1 of 2.
-rw-r--r-- | contrib/libs/pire/pire/extra/count.cpp | 302 | ||||
-rw-r--r-- | contrib/libs/pire/pire/extra/count.h | 472 | ||||
-rw-r--r-- | contrib/libs/pire/pire/fsm.cpp | 2 | ||||
-rw-r--r-- | contrib/libs/pire/pire/run.h | 88 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanner_io.cpp | 30 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/common.h | 14 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/half_final.h | 8 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/loaded.h | 34 | ||||
-rw-r--r-- | contrib/libs/pire/pire/scanners/multi.h | 24 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 352 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 200 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 224 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 4 |
13 files changed, 877 insertions, 877 deletions
diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp index 468ff61d92..27de4e3106 100644 --- a/contrib/libs/pire/pire/extra/count.cpp +++ b/contrib/libs/pire/pire/extra/count.cpp @@ -837,9 +837,9 @@ CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep) BuildScanner(sq, *this); } -namespace Impl { -template <class AdvancedScanner> -AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) { +namespace Impl { +template <class AdvancedScanner> +AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) { Impl::CountingFsm countingFsm{re, sep}; if (!countingFsm.Determine()) { throw Error("regexp pattern too complicated"); @@ -852,31 +852,31 @@ AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* const auto& determined = countingFsm.Determined(); const auto& letters = countingFsm.Letters(); - AdvancedScanner scanner; - scanner.Init(determined.Size(), letters, determined.Initial(), 1); + AdvancedScanner scanner; + scanner.Init(determined.Size(), letters, determined.Initial(), 1); for (size_t from = 0; from != determined.Size(); ++from) { for (auto&& lettersEl : letters) { const auto letter = lettersEl.first; const auto& tos = determined.Destinations(from, letter); Y_ASSERT(tos.size() == 1); - scanner.SetJump(from, letter, *tos.begin(), scanner.RemapAction(countingFsm.Output(from, letter))); + scanner.SetJump(from, letter, *tos.begin(), scanner.RemapAction(countingFsm.Output(from, letter))); } } - return scanner; -} -} // namespace Impl - -AdvancedCountingScanner::AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) - : AdvancedCountingScanner(Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(re, sep, simple)) -{ + return scanner; } - -NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) - : NoGlueLimitCountingScanner(Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(re, sep, simple)) -{ -} - - +} // namespace Impl + +AdvancedCountingScanner::AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) + : AdvancedCountingScanner(Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(re, sep, simple)) +{ +} + +NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) + : NoGlueLimitCountingScanner(Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(re, sep, simple)) +{ +} + + namespace Impl { template<class Scanner> @@ -908,7 +908,7 @@ public: Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount())); } -protected: +protected: TVector<State> States; TAction Action(const Scanner& sc, InternalState state, Char letter) const { @@ -919,74 +919,74 @@ protected: } }; -class NoGlueLimitCountingScannerGlueTask : public CountingScannerGlueTask<NoGlueLimitCountingScanner> { -public: - using ActionIndex = NoGlueLimitCountingScanner::ActionIndex; - struct TGlueAction { - TVector<ActionIndex> resets; - TVector<ActionIndex> increments; - bool operator<(const TGlueAction& rhs) const { - return std::tie(increments, resets) < std::tie(rhs.increments, rhs.resets); - } - }; - using TGlueMap = TMap<TGlueAction, ActionIndex>; - - NoGlueLimitCountingScannerGlueTask(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs) - : CountingScannerGlueTask(lhs, rhs) - { - } - - void Connect(size_t from, size_t to, Char letter) - { - TGlueAction glue_action; - this->Lhs().GetActions(Action(this->Lhs(), States[from].first, letter), 0, - std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments)); - this->Rhs().GetActions(Action(this->Rhs(), States[from].second, letter), this->Lhs().RegexpsCount(), - std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments)); - Y_ASSERT( - std::is_sorted(glue_action.increments.begin(), glue_action.increments.end()) && - std::is_sorted(glue_action.resets.begin(), glue_action.resets.end()) - ); - - if (glue_action.increments.empty() && glue_action.resets.empty()) { - this->Sc().SetJump(from, letter, to, 0); - return; - } - - auto action_iter = glue_map_.find(glue_action); - if (action_iter == glue_map_.end()) { - glue_map_[glue_action] = glue_actions_.size(); - for (const auto& ids : {glue_action.resets, glue_action.increments}) { - glue_actions_.push_back(ids.size()); - std::copy(ids.begin(), ids.end(), std::back_inserter(glue_actions_)); - } - } - - this->Sc().SetJump(from, letter, to, glue_map_[glue_action]); - } - - // Return type is same as in parent class - // TODO: Maybe return by value to use move semantic? - const NoGlueLimitCountingScanner& Success() - { - glue_actions_[0] = glue_actions_.size(); - Sc().AcceptActions(glue_actions_); - return Sc(); - } - -private: - TGlueMap glue_map_; - TVector<ActionIndex> glue_actions_ = {1}; -}; - - +class NoGlueLimitCountingScannerGlueTask : public CountingScannerGlueTask<NoGlueLimitCountingScanner> { +public: + using ActionIndex = NoGlueLimitCountingScanner::ActionIndex; + struct TGlueAction { + TVector<ActionIndex> resets; + TVector<ActionIndex> increments; + bool operator<(const TGlueAction& rhs) const { + return std::tie(increments, resets) < std::tie(rhs.increments, rhs.resets); + } + }; + using TGlueMap = TMap<TGlueAction, ActionIndex>; + + NoGlueLimitCountingScannerGlueTask(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs) + : CountingScannerGlueTask(lhs, rhs) + { + } + + void Connect(size_t from, size_t to, Char letter) + { + TGlueAction glue_action; + this->Lhs().GetActions(Action(this->Lhs(), States[from].first, letter), 0, + std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments)); + this->Rhs().GetActions(Action(this->Rhs(), States[from].second, letter), this->Lhs().RegexpsCount(), + std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments)); + Y_ASSERT( + std::is_sorted(glue_action.increments.begin(), glue_action.increments.end()) && + std::is_sorted(glue_action.resets.begin(), glue_action.resets.end()) + ); + + if (glue_action.increments.empty() && glue_action.resets.empty()) { + this->Sc().SetJump(from, letter, to, 0); + return; + } + + auto action_iter = glue_map_.find(glue_action); + if (action_iter == glue_map_.end()) { + glue_map_[glue_action] = glue_actions_.size(); + for (const auto& ids : {glue_action.resets, glue_action.increments}) { + glue_actions_.push_back(ids.size()); + std::copy(ids.begin(), ids.end(), std::back_inserter(glue_actions_)); + } + } + + this->Sc().SetJump(from, letter, to, glue_map_[glue_action]); + } + + // Return type is same as in parent class + // TODO: Maybe return by value to use move semantic? + const NoGlueLimitCountingScanner& Success() + { + glue_actions_[0] = glue_actions_.size(); + Sc().AcceptActions(glue_actions_); + return Sc(); + } + +private: + TGlueMap glue_map_; + TVector<ActionIndex> glue_actions_ = {1}; +}; + + } CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */) { - if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { - return CountingScanner(); - } + if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { + return CountingScanner(); + } static constexpr size_t DefMaxSize = 250000; Impl::CountingScannerGlueTask<CountingScanner> task(lhs, rhs); return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); @@ -994,75 +994,75 @@ CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const Counting AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScanner& lhs, const AdvancedCountingScanner& rhs, size_t maxSize /* = 0 */) { - if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { - return AdvancedCountingScanner(); - } + if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) { + return AdvancedCountingScanner(); + } static constexpr size_t DefMaxSize = 250000; Impl::CountingScannerGlueTask<AdvancedCountingScanner> task(lhs, rhs); return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); } -NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */) -{ - static constexpr size_t DefMaxSize = 250000; - Impl::NoGlueLimitCountingScannerGlueTask task(lhs, rhs); - return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); -} - -// Should Save(), Load() and Mmap() functions return stream/pointer in aligned state? -// Now they don't because tests don't require it. -void NoGlueLimitCountingScanner::Save(yostream* s) const { - Y_ASSERT(!AdvancedScannerCompatibilityMode); - LoadedScanner::Save(s, ScannerIOTypes::NoGlueLimitCountingScanner); - if (Actions) { - SavePodArray(s, Actions, *Actions); - } else { - const ActionIndex zeroSize = 0; - SavePodType(s, zeroSize); - } -} - -void NoGlueLimitCountingScanner::Load(yistream* s) { - ui32 type; - LoadedScanner::Load(s, &type); - ActionIndex actionsSize; - if (type == ScannerIOTypes::NoGlueLimitCountingScanner) { - LoadPodType(s, actionsSize); - - if (actionsSize == 0) { - ActionsBuffer.reset(); - Actions = nullptr; - } else { - ActionsBuffer = TActionsBuffer(new ActionIndex[actionsSize]); - ActionsBuffer[0] = actionsSize; - LoadPodArray(s, &ActionsBuffer[1], actionsSize - 1); - Actions = ActionsBuffer.get(); - } - } else { - Y_ASSERT(type == ScannerIOTypes::LoadedScanner); - AdvancedScannerCompatibilityMode = true; - } -} - -const void* NoGlueLimitCountingScanner::Mmap(const void* ptr, size_t size) { - NoGlueLimitCountingScanner scanner; - ui32 type; - auto p = static_cast<const size_t*> (scanner.LoadedScanner::Mmap(ptr, size, &type)); - - if (type == ScannerIOTypes::NoGlueLimitCountingScanner) { - scanner.Actions = reinterpret_cast<const ActionIndex*>(p); - if (*scanner.Actions == 0) { - scanner.Actions = nullptr; - Impl::AdvancePtr(p, size, sizeof(ActionIndex)); - } else { - Impl::AdvancePtr(p, size, *scanner.Actions * sizeof(ActionIndex)); - } - } else { - Y_ASSERT(type == ScannerIOTypes::LoadedScanner); - scanner.AdvancedScannerCompatibilityMode = true; - } - Swap(scanner); - return static_cast<const void*>(p); -} - +NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */) +{ + static constexpr size_t DefMaxSize = 250000; + Impl::NoGlueLimitCountingScannerGlueTask task(lhs, rhs); + return Impl::Determine(task, maxSize ? maxSize : DefMaxSize); } + +// Should Save(), Load() and Mmap() functions return stream/pointer in aligned state? +// Now they don't because tests don't require it. +void NoGlueLimitCountingScanner::Save(yostream* s) const { + Y_ASSERT(!AdvancedScannerCompatibilityMode); + LoadedScanner::Save(s, ScannerIOTypes::NoGlueLimitCountingScanner); + if (Actions) { + SavePodArray(s, Actions, *Actions); + } else { + const ActionIndex zeroSize = 0; + SavePodType(s, zeroSize); + } +} + +void NoGlueLimitCountingScanner::Load(yistream* s) { + ui32 type; + LoadedScanner::Load(s, &type); + ActionIndex actionsSize; + if (type == ScannerIOTypes::NoGlueLimitCountingScanner) { + LoadPodType(s, actionsSize); + + if (actionsSize == 0) { + ActionsBuffer.reset(); + Actions = nullptr; + } else { + ActionsBuffer = TActionsBuffer(new ActionIndex[actionsSize]); + ActionsBuffer[0] = actionsSize; + LoadPodArray(s, &ActionsBuffer[1], actionsSize - 1); + Actions = ActionsBuffer.get(); + } + } else { + Y_ASSERT(type == ScannerIOTypes::LoadedScanner); + AdvancedScannerCompatibilityMode = true; + } +} + +const void* NoGlueLimitCountingScanner::Mmap(const void* ptr, size_t size) { + NoGlueLimitCountingScanner scanner; + ui32 type; + auto p = static_cast<const size_t*> (scanner.LoadedScanner::Mmap(ptr, size, &type)); + + if (type == ScannerIOTypes::NoGlueLimitCountingScanner) { + scanner.Actions = reinterpret_cast<const ActionIndex*>(p); + if (*scanner.Actions == 0) { + scanner.Actions = nullptr; + Impl::AdvancePtr(p, size, sizeof(ActionIndex)); + } else { + Impl::AdvancePtr(p, size, *scanner.Actions * sizeof(ActionIndex)); + } + } else { + Y_ASSERT(type == ScannerIOTypes::LoadedScanner); + scanner.AdvancedScannerCompatibilityMode = true; + } + Swap(scanner); + return static_cast<const void*>(p); +} + +} diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h index bd1526b98d..7a67f64e28 100644 --- a/contrib/libs/pire/pire/extra/count.h +++ b/contrib/libs/pire/pire/extra/count.h @@ -27,8 +27,8 @@ #include <contrib/libs/pire/pire/scanners/loaded.h> #include <contrib/libs/pire/pire/fsm.h> -#include <algorithm> - +#include <algorithm> + namespace Pire { class Fsm; @@ -38,11 +38,11 @@ namespace Impl { template<class T> class CountingScannerGlueTask; - - class NoGlueLimitCountingScannerGlueTask; - - template <class AdvancedScanner> - AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple); + + class NoGlueLimitCountingScannerGlueTask; + + template <class AdvancedScanner> + AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple); }; template<size_t I> @@ -115,7 +115,7 @@ public: * given regexp separated by another regexp * in input text. */ -template<class DerivedScanner, class State> +template<class DerivedScanner, class State> class BaseCountingScanner: public LoadedScanner { public: enum { @@ -137,7 +137,7 @@ public: PIRE_FORCED_INLINE PIRE_HOT_FUNCTION void TakeAction(State& s, Action a) const { - static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a); + static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a); } bool CanStop(const State&) const { return false; } @@ -203,41 +203,41 @@ protected: } }; -template <size_t MAX_RE_COUNT> -class CountingState { -public: - size_t Result(int i) const { return ymax(m_current[i], m_total[i]); } -private: - using InternalState = LoadedScanner::InternalState; - InternalState m_state; - ui32 m_current[MAX_RE_COUNT]; - ui32 m_total[MAX_RE_COUNT]; - size_t m_updatedMask; - - template <class DerivedScanner, class State> - friend class BaseCountingScanner; - - template<size_t I> - friend class IncrementPerformer; - - template<size_t I> - friend class ResetPerformer; - -#ifdef PIRE_DEBUG - friend yostream& operator << (yostream& s, const State& state) - { - s << state.m_state << " ( "; - for (size_t i = 0; i < MAX_RE_COUNT; ++i) - s << state.m_current[i] << '/' << state.m_total[i] << ' '; - return s << ')'; - } -#endif -}; - - -class CountingScanner : public BaseCountingScanner<CountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { +template <size_t MAX_RE_COUNT> +class CountingState { public: - using State = CountingState<MAX_RE_COUNT>; + size_t Result(int i) const { return ymax(m_current[i], m_total[i]); } +private: + using InternalState = LoadedScanner::InternalState; + InternalState m_state; + ui32 m_current[MAX_RE_COUNT]; + ui32 m_total[MAX_RE_COUNT]; + size_t m_updatedMask; + + template <class DerivedScanner, class State> + friend class BaseCountingScanner; + + template<size_t I> + friend class IncrementPerformer; + + template<size_t I> + friend class ResetPerformer; + +#ifdef PIRE_DEBUG + friend yostream& operator << (yostream& s, const State& state) + { + s << state.m_state << " ( "; + for (size_t i = 0; i < MAX_RE_COUNT; ++i) + s << state.m_current[i] << '/' << state.m_total[i] << ' '; + return s << ')'; + } +#endif +}; + + +class CountingScanner : public BaseCountingScanner<CountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { +public: + using State = CountingState<MAX_RE_COUNT>; enum { Matched = 2, }; @@ -273,10 +273,10 @@ private: friend class Impl::CountingScannerGlueTask<CountingScanner>; }; -class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { +class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> { public: - using State = CountingState<MAX_RE_COUNT>; - + using State = CountingState<MAX_RE_COUNT>; + AdvancedCountingScanner() {} AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr); @@ -309,196 +309,196 @@ private: friend class Impl::ScannerGlueCommon<AdvancedCountingScanner>; friend class Impl::CountingScannerGlueTask<AdvancedCountingScanner>; - friend AdvancedCountingScanner Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(const Fsm&, const Fsm&, bool*); -}; - -class NoGlueLimitCountingState { -public: - size_t Result(int i) const { return ymax(m_current[i], m_total[i]); } - void Initialize(size_t initial, size_t regexpsCount) { - m_state = initial; - m_current.assign(regexpsCount, 0); - m_total.assign(regexpsCount, 0); - } - PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - void Reset(size_t regexpId) { - m_current[regexpId] = 0; - } - PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - void Increment(size_t regexp_id) { - ++m_current[regexp_id]; - m_total[regexp_id] = ymax(m_total[regexp_id], m_current[regexp_id]); - } - - template<size_t I> - friend class IncrementPerformer; - - template<size_t I> - friend class ResetPerformer; - -private: - LoadedScanner::InternalState m_state; - TVector<ui32> m_current; - TVector<ui32> m_total; - - template <class DerivedScanner, class State> - friend class BaseCountingScanner; - -#ifdef PIRE_DEBUG - yostream& operator << (yostream& s, const State& state) - { - s << state.m_state << " ( "; - for (size_t i = 0; i < state.m_current.size(); ++i) - s << state.m_current[i] << '/' << state.m_total[i] << ' '; - return s << ')'; - } -#endif + friend AdvancedCountingScanner Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(const Fsm&, const Fsm&, bool*); }; - -class NoGlueLimitCountingScanner : public BaseCountingScanner<NoGlueLimitCountingScanner, NoGlueLimitCountingState> { -public: - using State = NoGlueLimitCountingState; - using ActionIndex = ui32; - using TActionsBuffer = std::unique_ptr<ActionIndex[]>; - -private: - TActionsBuffer ActionsBuffer; - const ActionIndex* Actions = nullptr; - bool AdvancedScannerCompatibilityMode = false; - -public: - NoGlueLimitCountingScanner() = default; - NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr); - NoGlueLimitCountingScanner(const NoGlueLimitCountingScanner& rhs) - : BaseCountingScanner(rhs) - , AdvancedScannerCompatibilityMode(rhs.AdvancedScannerCompatibilityMode) - { - if (rhs.ActionsBuffer) { - Y_ASSERT(rhs.Actions); - ActionsBuffer = TActionsBuffer(new ActionIndex [*rhs.Actions]); - std::copy_n(rhs.ActionsBuffer.get(), *rhs.Actions, ActionsBuffer.get()); - Actions = ActionsBuffer.get(); - } else { - Actions = rhs.Actions; - } - } - - NoGlueLimitCountingScanner(NoGlueLimitCountingScanner&& other) : BaseCountingScanner() { - Swap(other); - } - - NoGlueLimitCountingScanner& operator=(NoGlueLimitCountingScanner rhs) { - Swap(rhs); - return *this; - } - - void Swap(NoGlueLimitCountingScanner& s) { - LoadedScanner::Swap(s); - DoSwap(ActionsBuffer, s.ActionsBuffer); - DoSwap(Actions, s.Actions); - DoSwap(AdvancedScannerCompatibilityMode, s.AdvancedScannerCompatibilityMode); - } - - void Initialize(State& state) const - { - state.Initialize(m.initial, RegexpsCount()); - } - - template <size_t ActualReCount> - PIRE_FORCED_INLINE PIRE_HOT_FUNCTION - void TakeActionImpl(State& s, Action a) const - { - if (!a) { - return; - } - if (AdvancedScannerCompatibilityMode) { - AdvancedScannerTakeActionImpl<ActualReCount>(s, a); - return; - } - // Note: it's important to perform resets before increments, - // as it's possible for one repetition group to stop and another begin at the same symbol - if (Actions) { - auto action = Actions + a; - for (auto reset_count = *action++; reset_count--;) { - s.Reset(*action++); - } - for (auto inc_count = *action++; inc_count--;) { - s.Increment(*action++); - } - } else { - Y_ASSERT(RegexpsCount() == 1); - if (a & ResetAction) { - s.Reset(0); - } - if (a & IncrementAction) { - s.Increment(0); - } - } - } - - void Save(yostream* s) const; - - void Load(yistream* s); - - const void* Mmap(const void* ptr, size_t size); - - static NoGlueLimitCountingScanner Glue(const NoGlueLimitCountingScanner& a, const NoGlueLimitCountingScanner& b, size_t maxSize = 0); - -private: - Action RemapAction(Action action) - { - return action; - } - - template <class Iterator> - void GetActions(Action a, ActionIndex id_shift, Iterator output_resets, Iterator output_increments) const { - if (!a) { - return; - } - if (!Actions) { - if (a & ResetAction) { - *output_resets++ = id_shift; - } - if (a & NoGlueLimitCountingScanner::IncrementAction) { - *output_increments++ = id_shift; - } - return; - } - auto action = Actions + a; - for (auto output : {output_resets, output_increments}) { - for (auto count = *action++; count--;) { - *output++ = *action++ + id_shift; - } - } - } - - void AcceptActions(const TVector<ActionIndex>& actions) { - Y_ASSERT(!Actions); - Y_ASSERT(!actions.empty()); - Y_ASSERT(actions[0] == actions.size()); - - ActionsBuffer = TActionsBuffer(new ActionIndex[actions.size()]); - std::copy(actions.begin(), actions.end(), ActionsBuffer.get()); - Actions = ActionsBuffer.get(); - } - - template <size_t ActualReCount> - void AdvancedScannerTakeActionImpl(State& s, Action a) const { - if (a & ResetMask) { - ResetPerformer<ActualReCount>::Do(s, a); - } - if (a & IncrementMask) { - IncrementPerformer<ActualReCount>::Do(s, a); - } - } - - friend class Impl::ScannerGlueCommon<NoGlueLimitCountingScanner>; - friend class Impl::CountingScannerGlueTask<NoGlueLimitCountingScanner>; - friend class Impl::NoGlueLimitCountingScannerGlueTask; - friend NoGlueLimitCountingScanner Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(const Fsm&, const Fsm&, bool*); -}; - -} - +class NoGlueLimitCountingState { +public: + size_t Result(int i) const { return ymax(m_current[i], m_total[i]); } + void Initialize(size_t initial, size_t regexpsCount) { + m_state = initial; + m_current.assign(regexpsCount, 0); + m_total.assign(regexpsCount, 0); + } + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION + void Reset(size_t regexpId) { + m_current[regexpId] = 0; + } + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION + void Increment(size_t regexp_id) { + ++m_current[regexp_id]; + m_total[regexp_id] = ymax(m_total[regexp_id], m_current[regexp_id]); + } + + template<size_t I> + friend class IncrementPerformer; + + template<size_t I> + friend class ResetPerformer; + +private: + LoadedScanner::InternalState m_state; + TVector<ui32> m_current; + TVector<ui32> m_total; + + template <class DerivedScanner, class State> + friend class BaseCountingScanner; + +#ifdef PIRE_DEBUG + yostream& operator << (yostream& s, const State& state) + { + s << state.m_state << " ( "; + for (size_t i = 0; i < state.m_current.size(); ++i) + s << state.m_current[i] << '/' << state.m_total[i] << ' '; + return s << ')'; + } #endif +}; + + +class NoGlueLimitCountingScanner : public BaseCountingScanner<NoGlueLimitCountingScanner, NoGlueLimitCountingState> { +public: + using State = NoGlueLimitCountingState; + using ActionIndex = ui32; + using TActionsBuffer = std::unique_ptr<ActionIndex[]>; + +private: + TActionsBuffer ActionsBuffer; + const ActionIndex* Actions = nullptr; + bool AdvancedScannerCompatibilityMode = false; + +public: + NoGlueLimitCountingScanner() = default; + NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr); + NoGlueLimitCountingScanner(const NoGlueLimitCountingScanner& rhs) + : BaseCountingScanner(rhs) + , AdvancedScannerCompatibilityMode(rhs.AdvancedScannerCompatibilityMode) + { + if (rhs.ActionsBuffer) { + Y_ASSERT(rhs.Actions); + ActionsBuffer = TActionsBuffer(new ActionIndex [*rhs.Actions]); + std::copy_n(rhs.ActionsBuffer.get(), *rhs.Actions, ActionsBuffer.get()); + Actions = ActionsBuffer.get(); + } else { + Actions = rhs.Actions; + } + } + + NoGlueLimitCountingScanner(NoGlueLimitCountingScanner&& other) : BaseCountingScanner() { + Swap(other); + } + + NoGlueLimitCountingScanner& operator=(NoGlueLimitCountingScanner rhs) { + Swap(rhs); + return *this; + } + + void Swap(NoGlueLimitCountingScanner& s) { + LoadedScanner::Swap(s); + DoSwap(ActionsBuffer, s.ActionsBuffer); + DoSwap(Actions, s.Actions); + DoSwap(AdvancedScannerCompatibilityMode, s.AdvancedScannerCompatibilityMode); + } + + void Initialize(State& state) const + { + state.Initialize(m.initial, RegexpsCount()); + } + + template <size_t ActualReCount> + PIRE_FORCED_INLINE PIRE_HOT_FUNCTION + void TakeActionImpl(State& s, Action a) const + { + if (!a) { + return; + } + if (AdvancedScannerCompatibilityMode) { + AdvancedScannerTakeActionImpl<ActualReCount>(s, a); + return; + } + // Note: it's important to perform resets before increments, + // as it's possible for one repetition group to stop and another begin at the same symbol + if (Actions) { + auto action = Actions + a; + for (auto reset_count = *action++; reset_count--;) { + s.Reset(*action++); + } + for (auto inc_count = *action++; inc_count--;) { + s.Increment(*action++); + } + } else { + Y_ASSERT(RegexpsCount() == 1); + if (a & ResetAction) { + s.Reset(0); + } + if (a & IncrementAction) { + s.Increment(0); + } + } + } + + void Save(yostream* s) const; + + void Load(yistream* s); + + const void* Mmap(const void* ptr, size_t size); + + static NoGlueLimitCountingScanner Glue(const NoGlueLimitCountingScanner& a, const NoGlueLimitCountingScanner& b, size_t maxSize = 0); + +private: + Action RemapAction(Action action) + { + return action; + } + + template <class Iterator> + void GetActions(Action a, ActionIndex id_shift, Iterator output_resets, Iterator output_increments) const { + if (!a) { + return; + } + if (!Actions) { + if (a & ResetAction) { + *output_resets++ = id_shift; + } + if (a & NoGlueLimitCountingScanner::IncrementAction) { + *output_increments++ = id_shift; + } + return; + } + auto action = Actions + a; + for (auto output : {output_resets, output_increments}) { + for (auto count = *action++; count--;) { + *output++ = *action++ + id_shift; + } + } + } + + void AcceptActions(const TVector<ActionIndex>& actions) { + Y_ASSERT(!Actions); + Y_ASSERT(!actions.empty()); + Y_ASSERT(actions[0] == actions.size()); + + ActionsBuffer = TActionsBuffer(new ActionIndex[actions.size()]); + std::copy(actions.begin(), actions.end(), ActionsBuffer.get()); + Actions = ActionsBuffer.get(); + } + + template <size_t ActualReCount> + void AdvancedScannerTakeActionImpl(State& s, Action a) const { + if (a & ResetMask) { + ResetPerformer<ActualReCount>::Do(s, a); + } + if (a & IncrementMask) { + IncrementPerformer<ActualReCount>::Do(s, a); + } + } + + friend class Impl::ScannerGlueCommon<NoGlueLimitCountingScanner>; + friend class Impl::CountingScannerGlueTask<NoGlueLimitCountingScanner>; + friend class Impl::NoGlueLimitCountingScannerGlueTask; + friend NoGlueLimitCountingScanner Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(const Fsm&, const Fsm&, bool*); +}; + +} + +#endif diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp index 984d708dfa..27bfd91522 100644 --- a/contrib/libs/pire/pire/fsm.cpp +++ b/contrib/libs/pire/pire/fsm.cpp @@ -612,7 +612,7 @@ Fsm& Fsm::Reverse() out.Connect(j, from, i.first); // Invert initial and final states - out.m_final.clear(); + out.m_final.clear(); out.SetFinal(initial, true); for (auto i : m_final) out.Connect(Size(), i, Epsilon); diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h index f6e1ff734d..6ce6c9780e 100644 --- a/contrib/libs/pire/pire/run.h +++ b/contrib/libs/pire/pire/run.h @@ -31,7 +31,7 @@ #include "platform.h" #include "defs.h" -#include <string> +#include <string> namespace Pire { @@ -283,67 +283,67 @@ void Run(const Scanner& sc, typename Scanner::State& st, const char* begin, cons Run(sc, st, TStringBuf(begin, end)); } -/// Returns default constructed string_view{} if there is no matching prefix -/// Returns str.substr(0, 0) if matching prefix is empty +/// Returns default constructed string_view{} if there is no matching prefix +/// Returns str.substr(0, 0) if matching prefix is empty template<class Scanner> -std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false) +std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false) { typename Scanner::State st; sc.Initialize(st); if (throughBeginMark) Pire::Step(sc, st, BeginMark); - const char* pos = (sc.Final(st) ? str.data() : nullptr); + const char* pos = (sc.Final(st) ? str.data() : nullptr); Impl::DoRun(sc, st, str, Impl::LongestPrefixPred<Scanner>(pos)); if (throughEndMark) { Pire::Step(sc, st, EndMark); if (sc.Final(st)) - pos = str.data() + str.size(); + pos = str.data() + str.size(); } - return pos ? str.substr(0, pos - str.data()) : std::string_view{}; + return pos ? str.substr(0, pos - str.data()) : std::string_view{}; } template<class Scanner> const char* LongestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false) { - auto prefix = LongestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark); - return prefix.data() + prefix.size(); + auto prefix = LongestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark); + return prefix.data() + prefix.size(); } -/// Returns default constructed string_view{} if there is no matching prefix -/// Returns str.substr(0, 0) if matching prefix is empty +/// Returns default constructed string_view{} if there is no matching prefix +/// Returns str.substr(0, 0) if matching prefix is empty template<class Scanner> -std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false) +std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false) { typename Scanner::State st; sc.Initialize(st); if (throughBeginMark) Pire::Step(sc, st, BeginMark); if (sc.Final(st)) - return str.substr(0, 0); - const char* pos = nullptr; + return str.substr(0, 0); + const char* pos = nullptr; Impl::DoRun(sc, st, str, Impl::ShortestPrefixPred<Scanner>(pos)); if (throughEndMark) { Pire::Step(sc, st, EndMark); - if (sc.Final(st) && !pos) - pos = str.data() + str.size(); + if (sc.Final(st) && !pos) + pos = str.data() + str.size(); } - return pos ? str.substr(0, pos - str.data()) : std::string_view{}; + return pos ? str.substr(0, pos - str.data()) : std::string_view{}; } template<class Scanner> const char* ShortestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false) { - auto prefix = ShortestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark); - return prefix.data() + prefix.size(); + auto prefix = ShortestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark); + return prefix.data() + prefix.size(); } /// The same as above, but scans string in reverse direction /// (consider using Fsm::Reverse() for using in this function). -/// Returns default constructed string_view{} if there is no matching suffix -/// Returns str.substr(str.size(), 0) if matching suffix is empty +/// Returns default constructed string_view{} if there is no matching suffix +/// Returns str.substr(str.size(), 0) if matching suffix is empty template<class Scanner> -inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false) +inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false) { typename Scanner::State state; scanner.Initialize(state); @@ -352,38 +352,38 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s PIRE_IFDEBUG(Cdbg << "Running LongestSuffix on string " << ystring(str) << Endl); PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); - std::string_view suffix{}; - auto begin = str.data() + str.size(); - while (begin != str.data() && !scanner.Dead(state)) { + std::string_view suffix{}; + auto begin = str.data() + str.size(); + while (begin != str.data() && !scanner.Dead(state)) { if (scanner.Final(state)) - suffix = str.substr(begin - str.data()); - --begin; - Step(scanner, state, (unsigned char)*begin); - PIRE_IFDEBUG(Cdbg << *begin << " => state " << StDump(scanner, state) << Endl); + suffix = str.substr(begin - str.data()); + --begin; + Step(scanner, state, (unsigned char)*begin); + PIRE_IFDEBUG(Cdbg << *begin << " => state " << StDump(scanner, state) << Endl); } if (scanner.Final(state)) - suffix = str.substr(begin - str.data()); + suffix = str.substr(begin - str.data()); if (throughBeginMark) { Step(scanner, state, BeginMark); if (scanner.Final(state)) - suffix = str.substr(begin - str.data()); + suffix = str.substr(begin - str.data()); } - return suffix; + return suffix; } template<class Scanner> inline const char* LongestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) { - auto suffix = LongestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark); - return suffix.data() ? suffix.data() - 1 : nullptr; + auto suffix = LongestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark); + return suffix.data() ? suffix.data() - 1 : nullptr; } /// The same as above, but scans string in reverse direction -/// Returns default constructed string_view{} if there is no matching suffix -/// Returns str.substr(str.size(), 0) if matching suffix is empty +/// Returns default constructed string_view{} if there is no matching suffix +/// Returns str.substr(str.size(), 0) if matching suffix is empty template<class Scanner> -inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false) +inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false) { - auto begin = str.data() + str.size(); + auto begin = str.data() + str.size(); typename Scanner::State state; scanner.Initialize(state); if (throughEndMark) @@ -391,20 +391,20 @@ inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view PIRE_IFDEBUG(Cdbg << "Running ShortestSuffix on string " << ystring(str) << Endl); PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl); - while (begin != str.data() && !scanner.Final(state) && !scanner.Dead(state)) { - --begin; - scanner.Next(state, (unsigned char)*begin); + while (begin != str.data() && !scanner.Final(state) && !scanner.Dead(state)) { + --begin; + scanner.Next(state, (unsigned char)*begin); PIRE_IFDEBUG(Cdbg << *rbegin << " => state " << StDump(scanner, state) << Endl); } if (throughBeginMark) Step(scanner, state, BeginMark); - return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{}; + return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{}; } template<class Scanner> inline const char* ShortestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) { - auto suffix = ShortestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark); - return suffix.data() ? suffix.data() - 1 : nullptr; + auto suffix = ShortestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark); + return suffix.data() ? suffix.data() - 1 : nullptr; } diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp index 3956e3c6ed..5c723ca427 100644 --- a/contrib/libs/pire/pire/scanner_io.cpp +++ b/contrib/libs/pire/pire/scanner_io.cpp @@ -169,14 +169,14 @@ void SlowScanner::Load(yistream* s) Swap(sc); } -void LoadedScanner::Save(yostream* s) const { - Save(s, ScannerIOTypes::LoadedScanner); -} - -void LoadedScanner::Save(yostream* s, ui32 type) const +void LoadedScanner::Save(yostream* s) const { + Save(s, ScannerIOTypes::LoadedScanner); +} + +void LoadedScanner::Save(yostream* s, ui32 type) const { - Y_ASSERT(type == ScannerIOTypes::LoadedScanner || type == ScannerIOTypes::NoGlueLimitCountingScanner); - SavePodType(s, Header(type, sizeof(m))); + Y_ASSERT(type == ScannerIOTypes::LoadedScanner || type == ScannerIOTypes::NoGlueLimitCountingScanner); + SavePodType(s, Header(type, sizeof(m))); Impl::AlignSave(s, sizeof(Header)); Locals mc = m; mc.initial -= reinterpret_cast<size_t>(m_jumps); @@ -188,17 +188,17 @@ void LoadedScanner::Save(yostream* s, ui32 type) const Impl::AlignedSaveArray(s, m_tags, m.statesCount); } -void LoadedScanner::Load(yistream* s) { - Load(s, nullptr); -} - -void LoadedScanner::Load(yistream* s, ui32* type) +void LoadedScanner::Load(yistream* s) { + Load(s, nullptr); +} + +void LoadedScanner::Load(yistream* s, ui32* type) { LoadedScanner sc; Header header = Impl::ValidateHeader(s, ScannerIOTypes::LoadedScanner, sizeof(sc.m)); - if (type) { - *type = header.Type; - } + if (type) { + *type = header.Type; + } LoadPodType(s, sc.m); Impl::AlignLoad(s, sizeof(sc.m)); sc.m_buffer = BufferType(new char[sc.BufSize()]); diff --git a/contrib/libs/pire/pire/scanners/common.h b/contrib/libs/pire/pire/scanners/common.h index de5ea0af7b..a92684cf3d 100644 --- a/contrib/libs/pire/pire/scanners/common.h +++ b/contrib/libs/pire/pire/scanners/common.h @@ -36,8 +36,8 @@ namespace Pire { Scanner = 1, SimpleScanner = 2, SlowScanner = 3, - LoadedScanner = 4, - NoGlueLimitCountingScanner = 5, + LoadedScanner = 4, + NoGlueLimitCountingScanner = 5, }; } @@ -68,12 +68,12 @@ namespace Pire { throw Error("Serialized regexp incompatible with your system"); if (Version != RE_VERSION && Version != RE_VERSION_WITH_MACTIONS) throw Error("You are trying to used an incompatible version of a serialized regexp"); - if (type != ScannerIOTypes::NoScanner && type != Type && - !(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) { - throw Error("Serialized regexp incompatible with your system"); - } - if (hdrsize != 0 && HdrSize != hdrsize) + if (type != ScannerIOTypes::NoScanner && type != Type && + !(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) { throw Error("Serialized regexp incompatible with your system"); + } + if (hdrsize != 0 && HdrSize != hdrsize) + throw Error("Serialized regexp incompatible with your system"); } }; diff --git a/contrib/libs/pire/pire/scanners/half_final.h b/contrib/libs/pire/pire/scanners/half_final.h index 071c3414a2..1755114302 100644 --- a/contrib/libs/pire/pire/scanners/half_final.h +++ b/contrib/libs/pire/pire/scanners/half_final.h @@ -210,13 +210,13 @@ private: void BuildFinals(const HalfFinalFsm& fsm) { Y_ASSERT(Scanner::m_buffer); Y_ASSERT(fsm.GetFsm().Size() == Scanner::Size()); - auto finalWriter = Scanner::m_final; + auto finalWriter = Scanner::m_final; for (size_t state = 0; state < Scanner::Size(); ++state) { - Scanner::m_finalIndex[state] = finalWriter - Scanner::m_final; + Scanner::m_finalIndex[state] = finalWriter - Scanner::m_final; for (size_t i = 0; i < fsm.GetCount(state); i++) { - *finalWriter++ = 0; + *finalWriter++ = 0; } - *finalWriter++ = static_cast<size_t>(-1); + *finalWriter++ = static_cast<size_t>(-1); } } diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h index 120dc403b7..ddc6a84b5b 100644 --- a/contrib/libs/pire/pire/scanners/loaded.h +++ b/contrib/libs/pire/pire/scanners/loaded.h @@ -103,13 +103,13 @@ protected: } LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; } - LoadedScanner (LoadedScanner&& other) : LoadedScanner() { - Swap(other); - } - LoadedScanner& operator=(LoadedScanner&& other) { - Swap(other); - return *this; - } + LoadedScanner (LoadedScanner&& other) : LoadedScanner() { + Swap(other); + } + LoadedScanner& operator=(LoadedScanner&& other) { + Swap(other); + return *this; + } public: size_t Size() const { return m.statesCount; } @@ -120,19 +120,19 @@ public: size_t LettersCount() const { return m.lettersCount; } - const void* Mmap(const void* ptr, size_t size) { - return Mmap(ptr, size, nullptr); - } - - const void* Mmap(const void* ptr, size_t size, ui32* type) + const void* Mmap(const void* ptr, size_t size) { + return Mmap(ptr, size, nullptr); + } + + const void* Mmap(const void* ptr, size_t size, ui32* type) { Impl::CheckAlign(ptr); LoadedScanner s; const size_t* p = reinterpret_cast<const size_t*>(ptr); Header header = Impl::ValidateHeader(p, size, ScannerIOTypes::LoadedScanner, sizeof(s.m)); - if (type) { - *type = header.Type; - } + if (type) { + *type = header.Type; + } Locals* locals; Impl::MapPtr(locals, 1, p, size); @@ -152,9 +152,9 @@ public: return (const void*) p; } - void Save(yostream*, ui32 type) const; + void Save(yostream*, ui32 type) const; void Save(yostream*) const; - void Load(yistream*, ui32* type); + void Load(yistream*, ui32* type); void Load(yistream*); template<class Eq> diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h index 29679e416e..b993808bf7 100644 --- a/contrib/libs/pire/pire/scanners/multi.h +++ b/contrib/libs/pire/pire/scanners/multi.h @@ -24,7 +24,7 @@ #ifndef PIRE_SCANNERS_MULTI_H #define PIRE_SCANNERS_MULTI_H -#include <cstring> +#include <cstring> #include <string.h> #include <contrib/libs/pire/pire/approx_matching.h> #include <contrib/libs/pire/pire/fsm.h> @@ -348,7 +348,7 @@ protected: template<class Eq> void Init(size_t states, const Partition<Char, Eq>& letters, size_t finalStatesCount, size_t startState, size_t regexpsCount = 1) { - std::memset(&m, 0, sizeof(m)); + std::memset(&m, 0, sizeof(m)); m.relocationSignature = Relocation::Signature; m.shortcuttingSignature = Shortcutting::Signature; m.statesCount = states; @@ -407,7 +407,7 @@ protected: m.relocationSignature = Relocation::Signature; m.shortcuttingSignature = Shortcutting::Signature; m_buffer = BufferType(new char[BufSize() + sizeof(size_t)]); - std::memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t)); + std::memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t)); Markup(AlignUp(m_buffer.Get(), sizeof(size_t))); // Values in letter-to-leterclass table take into account row header size @@ -513,12 +513,12 @@ protected: void FinishBuild() { Y_ASSERT(m_buffer); - auto finalWriter = m_final; + auto finalWriter = m_final; for (size_t state = 0; state != Size(); ++state) { - m_finalIndex[state] = finalWriter - m_final; + m_finalIndex[state] = finalWriter - m_final; if (Header(IndexToState(state)).Common.Flags & FinalFlag) - *finalWriter++ = 0; - *finalWriter++ = static_cast<size_t>(-1); + *finalWriter++ = 0; + *finalWriter++ = static_cast<size_t>(-1); } BuildShortcuts(); } @@ -1022,12 +1022,12 @@ public: this->SetSc(THolder<Scanner>(new Scanner)); Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount()); - auto finalWriter = Sc().m_final; + auto finalWriter = Sc().m_final; for (size_t state = 0; state != states.size(); ++state) { - Sc().m_finalIndex[state] = finalWriter - Sc().m_final; - finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter); - finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter); - *finalWriter++ = static_cast<size_t>(-1); + Sc().m_finalIndex[state] = finalWriter - Sc().m_final; + finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter); + finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter); + *finalWriter++ = static_cast<size_t>(-1); Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0) | ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0)); diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index ba321f9c29..82ca3880d1 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -17,201 +17,201 @@ namespace NHyperscan { using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>; namespace NPrivate { - ERuntime DetectCurrentRuntime() { + ERuntime DetectCurrentRuntime() { if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) { - return ERuntime::AVX512; + return ERuntime::AVX512; } else if (NX86::HaveAVX() && NX86::HaveAVX2()) { - return ERuntime::AVX2; + return ERuntime::AVX2; } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) { - return ERuntime::Corei7; + return ERuntime::Corei7; } else { - return ERuntime::Core2; + return ERuntime::Core2; } } - TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) { - switch (runtime) { - default: - Y_ASSERT(false); + TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) { + switch (runtime) { + default: + Y_ASSERT(false); [[fallthrough]]; - case ERuntime::Core2: - case ERuntime::Corei7: - return 0; - case ERuntime::AVX2: - return CPU_FEATURES_AVX2; - case ERuntime::AVX512: - return CPU_FEATURES_AVX512; - } + case ERuntime::Core2: + case ERuntime::Corei7: + return 0; + case ERuntime::AVX2: + return CPU_FEATURES_AVX2; + case ERuntime::AVX512: + return CPU_FEATURES_AVX512; + } } - hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { - hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; - return platformInfo; + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { + hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; + return platformInfo; } - + hs_platform_info_t MakeCurrentPlatformInfo() { return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime())); } - TImpl::TImpl(ERuntime runtime) { - switch (runtime) { - default: - Y_ASSERT(false); + TImpl::TImpl(ERuntime runtime) { + switch (runtime) { + default: + Y_ASSERT(false); [[fallthrough]]; - case ERuntime::Core2: - AllocScratch = core2_hs_alloc_scratch; - Scan = core2_hs_scan; - SerializeDatabase = core2_hs_serialize_database; - DeserializeDatabase = core2_hs_deserialize_database; - break; - case ERuntime::Corei7: - AllocScratch = corei7_hs_alloc_scratch; - Scan = corei7_hs_scan; - SerializeDatabase = corei7_hs_serialize_database; - DeserializeDatabase = corei7_hs_deserialize_database; - break; - case ERuntime::AVX2: - AllocScratch = avx2_hs_alloc_scratch; - Scan = avx2_hs_scan; - SerializeDatabase = avx2_hs_serialize_database; - DeserializeDatabase = avx2_hs_deserialize_database; - break; - case ERuntime::AVX512: - AllocScratch = avx512_hs_alloc_scratch; - Scan = avx512_hs_scan; - SerializeDatabase = avx512_hs_serialize_database; - DeserializeDatabase = avx512_hs_deserialize_database; - } + case ERuntime::Core2: + AllocScratch = core2_hs_alloc_scratch; + Scan = core2_hs_scan; + SerializeDatabase = core2_hs_serialize_database; + DeserializeDatabase = core2_hs_deserialize_database; + break; + case ERuntime::Corei7: + AllocScratch = corei7_hs_alloc_scratch; + Scan = corei7_hs_scan; + SerializeDatabase = corei7_hs_serialize_database; + DeserializeDatabase = corei7_hs_deserialize_database; + break; + case ERuntime::AVX2: + AllocScratch = avx2_hs_alloc_scratch; + Scan = avx2_hs_scan; + SerializeDatabase = avx2_hs_serialize_database; + DeserializeDatabase = avx2_hs_deserialize_database; + break; + case ERuntime::AVX512: + AllocScratch = avx512_hs_alloc_scratch; + Scan = avx512_hs_scan; + SerializeDatabase = avx512_hs_serialize_database; + DeserializeDatabase = avx512_hs_deserialize_database; + } } - - TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { - hs_database_t* rawDb = nullptr; - hs_compile_error_t* rawCompileErr = nullptr; - hs_error_t status = hs_compile( - regex.begin(), - flags, - HS_MODE_BLOCK, - platform, - &rawDb, - &rawCompileErr); - TDatabase db(rawDb); - NHyperscan::TCompileError compileError(rawCompileErr); - if (status != HS_SUCCESS) { + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { + hs_database_t* rawDb = nullptr; + hs_compile_error_t* rawCompileErr = nullptr; + hs_error_t status = hs_compile( + regex.begin(), + flags, + HS_MODE_BLOCK, + platform, + &rawDb, + &rawCompileErr); + TDatabase db(rawDb); + NHyperscan::TCompileError compileError(rawCompileErr); + if (status != HS_SUCCESS) { ythrow TCompileException() - << "Failed to compile regex: " << regex << ". " - << "Error message (hyperscan): " << compileError->message; - } - return db; - } - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - hs_platform_info_t* platform, - const TVector<const hs_expr_ext_t*>* extendedParameters) { - unsigned int count = regexs.size(); - if (flags.size() != count) { - ythrow yexception() - << "Mismatch of sizes vectors passed to CompileMulti. " - << "size(regexs) = " << regexs.size() << ". " - << "size(flags) = " << flags.size() << "."; - } - if (ids.size() != count) { - ythrow yexception() - << "Mismatch of sizes vectors passed to CompileMulti. " - << "size(regexs) = " << regexs.size() << ". " - << "size(ids) = " << ids.size() << "."; - } - if (extendedParameters && extendedParameters->size() != count) { - ythrow yexception() - << "Mismatch of sizes vectors passed to CompileMulti. " - << "size(regexs) = " << regexs.size() << ". " - << "size(extendedParameters) = " << extendedParameters->size() << "."; + << "Failed to compile regex: " << regex << ". " + << "Error message (hyperscan): " << compileError->message; } - hs_database_t* rawDb = nullptr; - hs_compile_error_t* rawCompileErr = nullptr; - hs_error_t status = hs_compile_ext_multi( - regexs.data(), - flags.data(), - ids.data(), - extendedParameters ? extendedParameters->data() : nullptr, - count, - HS_MODE_BLOCK, - platform, - &rawDb, - &rawCompileErr); - TDatabase db(rawDb); - NHyperscan::TCompileError compileError(rawCompileErr); - if (status != HS_SUCCESS) { - if (compileError->expression >= 0) { - const char* regex = regexs[compileError->expression]; - ythrow TCompileException() - << "Failed to compile regex: " << regex << ". " - << "Error message (hyperscan): " << compileError->message; - } else { - ythrow TCompileException() - << "Failed to compile multiple regexs. " - << "Error message (hyperscan): " << compileError->message; - } - } - return db; - } - - bool Matches( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, - const TImpl& impl) { - bool result = false; - auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) { - result = true; - return 1; // stop scan - }; - Scan( - db, - scratch, - text, - callback, - impl); - return result; + return db; } - } // namespace NPrivate - - TDatabase Compile(const TStringBuf& regex, unsigned int flags) { + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + hs_platform_info_t* platform, + const TVector<const hs_expr_ext_t*>* extendedParameters) { + unsigned int count = regexs.size(); + if (flags.size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(flags) = " << flags.size() << "."; + } + if (ids.size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(ids) = " << ids.size() << "."; + } + if (extendedParameters && extendedParameters->size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(extendedParameters) = " << extendedParameters->size() << "."; + } + hs_database_t* rawDb = nullptr; + hs_compile_error_t* rawCompileErr = nullptr; + hs_error_t status = hs_compile_ext_multi( + regexs.data(), + flags.data(), + ids.data(), + extendedParameters ? extendedParameters->data() : nullptr, + count, + HS_MODE_BLOCK, + platform, + &rawDb, + &rawCompileErr); + TDatabase db(rawDb); + NHyperscan::TCompileError compileError(rawCompileErr); + if (status != HS_SUCCESS) { + if (compileError->expression >= 0) { + const char* regex = regexs[compileError->expression]; + ythrow TCompileException() + << "Failed to compile regex: " << regex << ". " + << "Error message (hyperscan): " << compileError->message; + } else { + ythrow TCompileException() + << "Failed to compile multiple regexs. " + << "Error message (hyperscan): " << compileError->message; + } + } + return db; + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + const TImpl& impl) { + bool result = false; + auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) { + result = true; + return 1; // stop scan + }; + Scan( + db, + scratch, + text, + callback, + impl); + return result; + } + } // namespace NPrivate + + TDatabase Compile(const TStringBuf& regex, unsigned int flags) { auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); return NPrivate::Compile(regex, flags, &platformInfo); } - TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { - auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); - return NPrivate::Compile(regex, flags, &platformInfo); - } - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - const TVector<const hs_expr_ext_t*>* extendedParameters) - { + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { + auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); + return NPrivate::Compile(regex, flags, &platformInfo); + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + const TVector<const hs_expr_ext_t*>* extendedParameters) + { auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); - } - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - TCPUFeatures cpuFeatures, - const TVector<const hs_expr_ext_t*>* extendedParameters) - { - auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); - return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); - } - + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + TCPUFeatures cpuFeatures, + const TVector<const hs_expr_ext_t*>* extendedParameters) + { + auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); + return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); + } + TScratch MakeScratch(const TDatabase& db) { hs_scratch_t* rawScratch = nullptr; - hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); + hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); NHyperscan::TScratch scratch(rawScratch); if (status != HS_SUCCESS) { ythrow yexception() << "Failed to make scratch for hyperscan database"; @@ -221,7 +221,7 @@ namespace NHyperscan { void GrowScratch(TScratch& scratch, const TDatabase& db) { hs_scratch_t* rawScratch = scratch.Get(); - hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); + hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); if (rawScratch != scratch.Get()) { Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch scratch.Reset(rawScratch); @@ -244,9 +244,9 @@ namespace NHyperscan { bool Matches( const TDatabase& db, const TScratch& scratch, - const TStringBuf& text) - { - return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); + const TStringBuf& text) + { + return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); } TString Serialize(const TDatabase& db) { @@ -271,11 +271,11 @@ namespace NHyperscan { &rawDb); TDatabase db(rawDb); if (status != HS_SUCCESS) { - if (status == HS_DB_PLATFORM_ERROR) { - ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; - } else { - ythrow yexception() << "Failed to deserialize hyperscan database"; - } + if (status == HS_DB_PLATFORM_ERROR) { + ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; + } else { + ythrow yexception() << "Failed to deserialize hyperscan database"; + } } return db; } diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index 1c8f404389..ef50cca08e 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -9,14 +9,14 @@ #include <util/system/cpu_id.h> namespace NHyperscan { - using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); - constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; - constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; - - template<typename TNativeDeleter, TNativeDeleter NativeDeleter> + using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); + constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; + constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; + + template<typename TNativeDeleter, TNativeDeleter NativeDeleter> class TDeleter { public: - template<typename T> + template<typename T> static void Destroy(T* ptr) { NativeDeleter(ptr); } @@ -26,127 +26,127 @@ namespace NHyperscan { using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; - class TCompileException : public yexception { + class TCompileException : public yexception { }; - + namespace NPrivate { - enum class ERuntime { - Core2 = 0, - Corei7 = 1, - AVX2 = 2, - AVX512 = 3 - }; - - ERuntime DetectCurrentRuntime(); - - TCPUFeatures RuntimeCpuFeatures(ERuntime runtime); - - hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures); - + enum class ERuntime { + Core2 = 0, + Corei7 = 1, + AVX2 = 2, + AVX512 = 3 + }; + + ERuntime DetectCurrentRuntime(); + + TCPUFeatures RuntimeCpuFeatures(ERuntime runtime); + + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures); + struct TImpl { - hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch); - - hs_error_t (*Scan)(const hs_database_t* db, const char* data, - unsigned length, unsigned flags, hs_scratch_t* scratch, - match_event_handler onEvent, void* userCtx); - - hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length); - - hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info); - - TImpl() : TImpl(DetectCurrentRuntime()) {} - - explicit TImpl(ERuntime runtime); + hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch); + + hs_error_t (*Scan)(const hs_database_t* db, const char* data, + unsigned length, unsigned flags, hs_scratch_t* scratch, + match_event_handler onEvent, void* userCtx); + + hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length); + + hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info); + + TImpl() : TImpl(DetectCurrentRuntime()) {} + + explicit TImpl(ERuntime runtime); }; - - TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - hs_platform_info_t* platform, - const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); - - // We need to parametrize Scan and Matches functions for testing purposes - template<typename TCallback> - void Scan( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, - TCallback& callback, // applied to index of matched regex - const TImpl& impl - ) { - struct TCallbackWrapper { - static int EventHandler( - unsigned int id, - unsigned long long from, - unsigned long long to, - unsigned int flags, - void* ctx) { - Y_UNUSED(flags); - TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx); - if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) { - return callback2(id, from, to); - } else { - callback2(id, from, to); - return 0; - } - } - }; - unsigned int flags = 0; // unused at present - hs_error_t status = impl.Scan( - db.Get(), - text.begin(), - text.size(), - flags, - scratch.Get(), - &TCallbackWrapper::EventHandler, - &callback); - if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) { - ythrow yexception() << "Failed to scan against text: " << text; - } - } - - bool Matches( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, - const TImpl& impl); + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + hs_platform_info_t* platform, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + + // We need to parametrize Scan and Matches functions for testing purposes + template<typename TCallback> + void Scan( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + TCallback& callback, // applied to index of matched regex + const TImpl& impl + ) { + struct TCallbackWrapper { + static int EventHandler( + unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void* ctx) { + Y_UNUSED(flags); + TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx); + if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) { + return callback2(id, from, to); + } else { + callback2(id, from, to); + return 0; + } + } + }; + unsigned int flags = 0; // unused at present + hs_error_t status = impl.Scan( + db.Get(), + text.begin(), + text.size(), + flags, + scratch.Get(), + &TCallbackWrapper::EventHandler, + &callback); + if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) { + ythrow yexception() << "Failed to scan against text: " << text; + } + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + const TImpl& impl); } TDatabase Compile(const TStringBuf& regex, unsigned int flags); - TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); - + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); + TDatabase CompileMulti( const TVector<const char*>& regexs, const TVector<unsigned int>& flags, const TVector<unsigned int>& ids, - TCPUFeatures cpuFeatures, const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + TCPUFeatures cpuFeatures, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + TScratch MakeScratch(const TDatabase& db); void GrowScratch(TScratch& scratch, const TDatabase& db); TScratch CloneScratch(const TScratch& scratch); - template<typename TCallback> + template<typename TCallback> void Scan( const TDatabase& db, const TScratch& scratch, const TStringBuf& text, TCallback& callback // applied to index of matched regex ) { - NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); + NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); } bool Matches( diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 9caa53f2e7..7abbaa4b08 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -4,12 +4,12 @@ #include <util/generic/set.h> -#include <array> -#include <algorithm> - +#include <array> +#include <algorithm> + Y_UNIT_TEST_SUITE(HyperscanWrappers) { using namespace NHyperscan; - using namespace NHyperscan::NPrivate; + using namespace NHyperscan::NPrivate; Y_UNIT_TEST(CompileAndScan) { TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); @@ -120,112 +120,112 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { scratch1.Reset(); UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); } - - class TSimpleSingleRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); - } - }; - - // This regex uses AVX2 instructions on long (>70) texts. - // It crushes when compiled for machine with AVX2 and run on machine without it. - class TAvx2SingleRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" - "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; - unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; - return NHyperscan::Compile(regex, flags, cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::NPrivate::Matches( - db, - scratch, - "_________________________________________________________________" - "фу.bar" - "_________________________________________________________________", - impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches( - db, - scratch, - "_________________________________________________________________" - "фу" - "_________________________________________________________________", - impl)); - } - }; - - class TSimpleMultiRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - return NHyperscan::CompileMulti( - { - "foo", - "bar", - }, - { - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, - }, - { - 42, - 241, - }, - cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); - - TSet<unsigned int> foundIds; - auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { - foundIds.insert(id); - }; - NHyperscan::NPrivate::Scan( - db, - scratch, - "fooBaR", - callback, - impl); - UNIT_ASSERT_EQUAL(foundIds.size(), 2); - UNIT_ASSERT(foundIds.contains(42)); - UNIT_ASSERT(foundIds.contains(241)); - } - }; - - template <class Regex> - void TestCrossPlatformCompile() { - const std::array<ERuntime, 4> runtimes = { - ERuntime::Core2, - ERuntime::Corei7, - ERuntime::AVX2, - ERuntime::AVX512 - }; - - // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. - auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); - Y_ASSERT(currentRuntimeIter != runtimes.cend()); - - for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { - auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); - Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); - } - } - - Y_UNIT_TEST(CrossPlatformCompile) { - TestCrossPlatformCompile<TSimpleSingleRegex>(); - TestCrossPlatformCompile<TAvx2SingleRegex>(); - TestCrossPlatformCompile<TSimpleMultiRegex>(); - } + + class TSimpleSingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + } + }; + + // This regex uses AVX2 instructions on long (>70) texts. + // It crushes when compiled for machine with AVX2 and run on machine without it. + class TAvx2SingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" + "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; + unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; + return NHyperscan::Compile(regex, flags, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу.bar" + "_________________________________________________________________", + impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу" + "_________________________________________________________________", + impl)); + } + }; + + class TSimpleMultiRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::CompileMulti( + { + "foo", + "bar", + }, + { + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, + }, + { + 42, + 241, + }, + cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + + TSet<unsigned int> foundIds; + auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { + foundIds.insert(id); + }; + NHyperscan::NPrivate::Scan( + db, + scratch, + "fooBaR", + callback, + impl); + UNIT_ASSERT_EQUAL(foundIds.size(), 2); + UNIT_ASSERT(foundIds.contains(42)); + UNIT_ASSERT(foundIds.contains(241)); + } + }; + + template <class Regex> + void TestCrossPlatformCompile() { + const std::array<ERuntime, 4> runtimes = { + ERuntime::Core2, + ERuntime::Corei7, + ERuntime::AVX2, + ERuntime::AVX512 + }; + + // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. + auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); + Y_ASSERT(currentRuntimeIter != runtimes.cend()); + + for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { + auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); + Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); + } + } + + Y_UNIT_TEST(CrossPlatformCompile) { + TestCrossPlatformCompile<TSimpleSingleRegex>(); + TestCrossPlatformCompile<TAvx2SingleRegex>(); + TestCrossPlatformCompile<TSimpleMultiRegex>(); + } } diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make index e99130ae18..e58d93502c 100644 --- a/library/cpp/regex/hyperscan/ya.make +++ b/library/cpp/regex/hyperscan/ya.make @@ -15,5 +15,5 @@ SRCS( ) END() - -RECURSE_FOR_TESTS(ut) + +RECURSE_FOR_TESTS(ut) |