aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjakovenko-dm <jakovenko-dm@yandex-team.ru>2022-02-10 16:48:06 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:06 +0300
commit7077baee21e33a3ad2e790527b1c50b22c244db3 (patch)
treee719eb81a7dbb542f49340ad8c36c65d58ac42f6
parent4282ec504ababea092138c3af45d5399d01c194a (diff)
downloadydb-7077baee21e33a3ad2e790527b1c50b22c244db3.tar.gz
Restoring authorship annotation for <jakovenko-dm@yandex-team.ru>. Commit 1 of 2.
-rw-r--r--contrib/libs/pire/pire/extra/count.cpp302
-rw-r--r--contrib/libs/pire/pire/extra/count.h472
-rw-r--r--contrib/libs/pire/pire/fsm.cpp2
-rw-r--r--contrib/libs/pire/pire/run.h88
-rw-r--r--contrib/libs/pire/pire/scanner_io.cpp30
-rw-r--r--contrib/libs/pire/pire/scanners/common.h14
-rw-r--r--contrib/libs/pire/pire/scanners/half_final.h8
-rw-r--r--contrib/libs/pire/pire/scanners/loaded.h34
-rw-r--r--contrib/libs/pire/pire/scanners/multi.h24
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.cpp352
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.h200
-rw-r--r--library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp224
-rw-r--r--library/cpp/regex/hyperscan/ya.make4
13 files changed, 877 insertions, 877 deletions
diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp
index 468ff61d92..27de4e3106 100644
--- a/contrib/libs/pire/pire/extra/count.cpp
+++ b/contrib/libs/pire/pire/extra/count.cpp
@@ -837,9 +837,9 @@ CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep)
BuildScanner(sq, *this);
}
-namespace Impl {
-template <class AdvancedScanner>
-AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) {
+namespace Impl {
+template <class AdvancedScanner>
+AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple) {
Impl::CountingFsm countingFsm{re, sep};
if (!countingFsm.Determine()) {
throw Error("regexp pattern too complicated");
@@ -852,31 +852,31 @@ AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool*
const auto& determined = countingFsm.Determined();
const auto& letters = countingFsm.Letters();
- AdvancedScanner scanner;
- scanner.Init(determined.Size(), letters, determined.Initial(), 1);
+ AdvancedScanner scanner;
+ scanner.Init(determined.Size(), letters, determined.Initial(), 1);
for (size_t from = 0; from != determined.Size(); ++from) {
for (auto&& lettersEl : letters) {
const auto letter = lettersEl.first;
const auto& tos = determined.Destinations(from, letter);
Y_ASSERT(tos.size() == 1);
- scanner.SetJump(from, letter, *tos.begin(), scanner.RemapAction(countingFsm.Output(from, letter)));
+ scanner.SetJump(from, letter, *tos.begin(), scanner.RemapAction(countingFsm.Output(from, letter)));
}
}
- return scanner;
-}
-} // namespace Impl
-
-AdvancedCountingScanner::AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple)
- : AdvancedCountingScanner(Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(re, sep, simple))
-{
+ return scanner;
}
-
-NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple)
- : NoGlueLimitCountingScanner(Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(re, sep, simple))
-{
-}
-
-
+} // namespace Impl
+
+AdvancedCountingScanner::AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple)
+ : AdvancedCountingScanner(Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(re, sep, simple))
+{
+}
+
+NoGlueLimitCountingScanner::NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple)
+ : NoGlueLimitCountingScanner(Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(re, sep, simple))
+{
+}
+
+
namespace Impl {
template<class Scanner>
@@ -908,7 +908,7 @@ public:
Action(this->Lhs(), States[from].first, letter) | (Action(this->Rhs(), States[from].second, letter) << this->Lhs().RegexpsCount()));
}
-protected:
+protected:
TVector<State> States;
TAction Action(const Scanner& sc, InternalState state, Char letter) const
{
@@ -919,74 +919,74 @@ protected:
}
};
-class NoGlueLimitCountingScannerGlueTask : public CountingScannerGlueTask<NoGlueLimitCountingScanner> {
-public:
- using ActionIndex = NoGlueLimitCountingScanner::ActionIndex;
- struct TGlueAction {
- TVector<ActionIndex> resets;
- TVector<ActionIndex> increments;
- bool operator<(const TGlueAction& rhs) const {
- return std::tie(increments, resets) < std::tie(rhs.increments, rhs.resets);
- }
- };
- using TGlueMap = TMap<TGlueAction, ActionIndex>;
-
- NoGlueLimitCountingScannerGlueTask(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs)
- : CountingScannerGlueTask(lhs, rhs)
- {
- }
-
- void Connect(size_t from, size_t to, Char letter)
- {
- TGlueAction glue_action;
- this->Lhs().GetActions(Action(this->Lhs(), States[from].first, letter), 0,
- std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments));
- this->Rhs().GetActions(Action(this->Rhs(), States[from].second, letter), this->Lhs().RegexpsCount(),
- std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments));
- Y_ASSERT(
- std::is_sorted(glue_action.increments.begin(), glue_action.increments.end()) &&
- std::is_sorted(glue_action.resets.begin(), glue_action.resets.end())
- );
-
- if (glue_action.increments.empty() && glue_action.resets.empty()) {
- this->Sc().SetJump(from, letter, to, 0);
- return;
- }
-
- auto action_iter = glue_map_.find(glue_action);
- if (action_iter == glue_map_.end()) {
- glue_map_[glue_action] = glue_actions_.size();
- for (const auto& ids : {glue_action.resets, glue_action.increments}) {
- glue_actions_.push_back(ids.size());
- std::copy(ids.begin(), ids.end(), std::back_inserter(glue_actions_));
- }
- }
-
- this->Sc().SetJump(from, letter, to, glue_map_[glue_action]);
- }
-
- // Return type is same as in parent class
- // TODO: Maybe return by value to use move semantic?
- const NoGlueLimitCountingScanner& Success()
- {
- glue_actions_[0] = glue_actions_.size();
- Sc().AcceptActions(glue_actions_);
- return Sc();
- }
-
-private:
- TGlueMap glue_map_;
- TVector<ActionIndex> glue_actions_ = {1};
-};
-
-
+class NoGlueLimitCountingScannerGlueTask : public CountingScannerGlueTask<NoGlueLimitCountingScanner> {
+public:
+ using ActionIndex = NoGlueLimitCountingScanner::ActionIndex;
+ struct TGlueAction {
+ TVector<ActionIndex> resets;
+ TVector<ActionIndex> increments;
+ bool operator<(const TGlueAction& rhs) const {
+ return std::tie(increments, resets) < std::tie(rhs.increments, rhs.resets);
+ }
+ };
+ using TGlueMap = TMap<TGlueAction, ActionIndex>;
+
+ NoGlueLimitCountingScannerGlueTask(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs)
+ : CountingScannerGlueTask(lhs, rhs)
+ {
+ }
+
+ void Connect(size_t from, size_t to, Char letter)
+ {
+ TGlueAction glue_action;
+ this->Lhs().GetActions(Action(this->Lhs(), States[from].first, letter), 0,
+ std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments));
+ this->Rhs().GetActions(Action(this->Rhs(), States[from].second, letter), this->Lhs().RegexpsCount(),
+ std::back_inserter(glue_action.resets), std::back_inserter(glue_action.increments));
+ Y_ASSERT(
+ std::is_sorted(glue_action.increments.begin(), glue_action.increments.end()) &&
+ std::is_sorted(glue_action.resets.begin(), glue_action.resets.end())
+ );
+
+ if (glue_action.increments.empty() && glue_action.resets.empty()) {
+ this->Sc().SetJump(from, letter, to, 0);
+ return;
+ }
+
+ auto action_iter = glue_map_.find(glue_action);
+ if (action_iter == glue_map_.end()) {
+ glue_map_[glue_action] = glue_actions_.size();
+ for (const auto& ids : {glue_action.resets, glue_action.increments}) {
+ glue_actions_.push_back(ids.size());
+ std::copy(ids.begin(), ids.end(), std::back_inserter(glue_actions_));
+ }
+ }
+
+ this->Sc().SetJump(from, letter, to, glue_map_[glue_action]);
+ }
+
+ // Return type is same as in parent class
+ // TODO: Maybe return by value to use move semantic?
+ const NoGlueLimitCountingScanner& Success()
+ {
+ glue_actions_[0] = glue_actions_.size();
+ Sc().AcceptActions(glue_actions_);
+ return Sc();
+ }
+
+private:
+ TGlueMap glue_map_;
+ TVector<ActionIndex> glue_actions_ = {1};
+};
+
+
}
CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const CountingScanner& rhs, size_t maxSize /* = 0 */)
{
- if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
- return CountingScanner();
- }
+ if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
+ return CountingScanner();
+ }
static constexpr size_t DefMaxSize = 250000;
Impl::CountingScannerGlueTask<CountingScanner> task(lhs, rhs);
return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
@@ -994,75 +994,75 @@ CountingScanner CountingScanner::Glue(const CountingScanner& lhs, const Counting
AdvancedCountingScanner AdvancedCountingScanner::Glue(const AdvancedCountingScanner& lhs, const AdvancedCountingScanner& rhs, size_t maxSize /* = 0 */)
{
- if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
- return AdvancedCountingScanner();
- }
+ if (lhs.RegexpsCount() + rhs.RegexpsCount() > MAX_RE_COUNT) {
+ return AdvancedCountingScanner();
+ }
static constexpr size_t DefMaxSize = 250000;
Impl::CountingScannerGlueTask<AdvancedCountingScanner> task(lhs, rhs);
return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
}
-NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */)
-{
- static constexpr size_t DefMaxSize = 250000;
- Impl::NoGlueLimitCountingScannerGlueTask task(lhs, rhs);
- return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
-}
-
-// Should Save(), Load() and Mmap() functions return stream/pointer in aligned state?
-// Now they don't because tests don't require it.
-void NoGlueLimitCountingScanner::Save(yostream* s) const {
- Y_ASSERT(!AdvancedScannerCompatibilityMode);
- LoadedScanner::Save(s, ScannerIOTypes::NoGlueLimitCountingScanner);
- if (Actions) {
- SavePodArray(s, Actions, *Actions);
- } else {
- const ActionIndex zeroSize = 0;
- SavePodType(s, zeroSize);
- }
-}
-
-void NoGlueLimitCountingScanner::Load(yistream* s) {
- ui32 type;
- LoadedScanner::Load(s, &type);
- ActionIndex actionsSize;
- if (type == ScannerIOTypes::NoGlueLimitCountingScanner) {
- LoadPodType(s, actionsSize);
-
- if (actionsSize == 0) {
- ActionsBuffer.reset();
- Actions = nullptr;
- } else {
- ActionsBuffer = TActionsBuffer(new ActionIndex[actionsSize]);
- ActionsBuffer[0] = actionsSize;
- LoadPodArray(s, &ActionsBuffer[1], actionsSize - 1);
- Actions = ActionsBuffer.get();
- }
- } else {
- Y_ASSERT(type == ScannerIOTypes::LoadedScanner);
- AdvancedScannerCompatibilityMode = true;
- }
-}
-
-const void* NoGlueLimitCountingScanner::Mmap(const void* ptr, size_t size) {
- NoGlueLimitCountingScanner scanner;
- ui32 type;
- auto p = static_cast<const size_t*> (scanner.LoadedScanner::Mmap(ptr, size, &type));
-
- if (type == ScannerIOTypes::NoGlueLimitCountingScanner) {
- scanner.Actions = reinterpret_cast<const ActionIndex*>(p);
- if (*scanner.Actions == 0) {
- scanner.Actions = nullptr;
- Impl::AdvancePtr(p, size, sizeof(ActionIndex));
- } else {
- Impl::AdvancePtr(p, size, *scanner.Actions * sizeof(ActionIndex));
- }
- } else {
- Y_ASSERT(type == ScannerIOTypes::LoadedScanner);
- scanner.AdvancedScannerCompatibilityMode = true;
- }
- Swap(scanner);
- return static_cast<const void*>(p);
-}
-
+NoGlueLimitCountingScanner NoGlueLimitCountingScanner::Glue(const NoGlueLimitCountingScanner& lhs, const NoGlueLimitCountingScanner& rhs, size_t maxSize /* = 0 */)
+{
+ static constexpr size_t DefMaxSize = 250000;
+ Impl::NoGlueLimitCountingScannerGlueTask task(lhs, rhs);
+ return Impl::Determine(task, maxSize ? maxSize : DefMaxSize);
}
+
+// Should Save(), Load() and Mmap() functions return stream/pointer in aligned state?
+// Now they don't because tests don't require it.
+void NoGlueLimitCountingScanner::Save(yostream* s) const {
+ Y_ASSERT(!AdvancedScannerCompatibilityMode);
+ LoadedScanner::Save(s, ScannerIOTypes::NoGlueLimitCountingScanner);
+ if (Actions) {
+ SavePodArray(s, Actions, *Actions);
+ } else {
+ const ActionIndex zeroSize = 0;
+ SavePodType(s, zeroSize);
+ }
+}
+
+void NoGlueLimitCountingScanner::Load(yistream* s) {
+ ui32 type;
+ LoadedScanner::Load(s, &type);
+ ActionIndex actionsSize;
+ if (type == ScannerIOTypes::NoGlueLimitCountingScanner) {
+ LoadPodType(s, actionsSize);
+
+ if (actionsSize == 0) {
+ ActionsBuffer.reset();
+ Actions = nullptr;
+ } else {
+ ActionsBuffer = TActionsBuffer(new ActionIndex[actionsSize]);
+ ActionsBuffer[0] = actionsSize;
+ LoadPodArray(s, &ActionsBuffer[1], actionsSize - 1);
+ Actions = ActionsBuffer.get();
+ }
+ } else {
+ Y_ASSERT(type == ScannerIOTypes::LoadedScanner);
+ AdvancedScannerCompatibilityMode = true;
+ }
+}
+
+const void* NoGlueLimitCountingScanner::Mmap(const void* ptr, size_t size) {
+ NoGlueLimitCountingScanner scanner;
+ ui32 type;
+ auto p = static_cast<const size_t*> (scanner.LoadedScanner::Mmap(ptr, size, &type));
+
+ if (type == ScannerIOTypes::NoGlueLimitCountingScanner) {
+ scanner.Actions = reinterpret_cast<const ActionIndex*>(p);
+ if (*scanner.Actions == 0) {
+ scanner.Actions = nullptr;
+ Impl::AdvancePtr(p, size, sizeof(ActionIndex));
+ } else {
+ Impl::AdvancePtr(p, size, *scanner.Actions * sizeof(ActionIndex));
+ }
+ } else {
+ Y_ASSERT(type == ScannerIOTypes::LoadedScanner);
+ scanner.AdvancedScannerCompatibilityMode = true;
+ }
+ Swap(scanner);
+ return static_cast<const void*>(p);
+}
+
+}
diff --git a/contrib/libs/pire/pire/extra/count.h b/contrib/libs/pire/pire/extra/count.h
index bd1526b98d..7a67f64e28 100644
--- a/contrib/libs/pire/pire/extra/count.h
+++ b/contrib/libs/pire/pire/extra/count.h
@@ -27,8 +27,8 @@
#include <contrib/libs/pire/pire/scanners/loaded.h>
#include <contrib/libs/pire/pire/fsm.h>
-#include <algorithm>
-
+#include <algorithm>
+
namespace Pire {
class Fsm;
@@ -38,11 +38,11 @@ namespace Impl {
template<class T>
class CountingScannerGlueTask;
-
- class NoGlueLimitCountingScannerGlueTask;
-
- template <class AdvancedScanner>
- AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple);
+
+ class NoGlueLimitCountingScannerGlueTask;
+
+ template <class AdvancedScanner>
+ AdvancedScanner MakeAdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple);
};
template<size_t I>
@@ -115,7 +115,7 @@ public:
* given regexp separated by another regexp
* in input text.
*/
-template<class DerivedScanner, class State>
+template<class DerivedScanner, class State>
class BaseCountingScanner: public LoadedScanner {
public:
enum {
@@ -137,7 +137,7 @@ public:
PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
void TakeAction(State& s, Action a) const
{
- static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a);
+ static_cast<const DerivedScanner*>(this)->template TakeActionImpl<MAX_RE_COUNT>(s, a);
}
bool CanStop(const State&) const { return false; }
@@ -203,41 +203,41 @@ protected:
}
};
-template <size_t MAX_RE_COUNT>
-class CountingState {
-public:
- size_t Result(int i) const { return ymax(m_current[i], m_total[i]); }
-private:
- using InternalState = LoadedScanner::InternalState;
- InternalState m_state;
- ui32 m_current[MAX_RE_COUNT];
- ui32 m_total[MAX_RE_COUNT];
- size_t m_updatedMask;
-
- template <class DerivedScanner, class State>
- friend class BaseCountingScanner;
-
- template<size_t I>
- friend class IncrementPerformer;
-
- template<size_t I>
- friend class ResetPerformer;
-
-#ifdef PIRE_DEBUG
- friend yostream& operator << (yostream& s, const State& state)
- {
- s << state.m_state << " ( ";
- for (size_t i = 0; i < MAX_RE_COUNT; ++i)
- s << state.m_current[i] << '/' << state.m_total[i] << ' ';
- return s << ')';
- }
-#endif
-};
-
-
-class CountingScanner : public BaseCountingScanner<CountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> {
+template <size_t MAX_RE_COUNT>
+class CountingState {
public:
- using State = CountingState<MAX_RE_COUNT>;
+ size_t Result(int i) const { return ymax(m_current[i], m_total[i]); }
+private:
+ using InternalState = LoadedScanner::InternalState;
+ InternalState m_state;
+ ui32 m_current[MAX_RE_COUNT];
+ ui32 m_total[MAX_RE_COUNT];
+ size_t m_updatedMask;
+
+ template <class DerivedScanner, class State>
+ friend class BaseCountingScanner;
+
+ template<size_t I>
+ friend class IncrementPerformer;
+
+ template<size_t I>
+ friend class ResetPerformer;
+
+#ifdef PIRE_DEBUG
+ friend yostream& operator << (yostream& s, const State& state)
+ {
+ s << state.m_state << " ( ";
+ for (size_t i = 0; i < MAX_RE_COUNT; ++i)
+ s << state.m_current[i] << '/' << state.m_total[i] << ' ';
+ return s << ')';
+ }
+#endif
+};
+
+
+class CountingScanner : public BaseCountingScanner<CountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> {
+public:
+ using State = CountingState<MAX_RE_COUNT>;
enum {
Matched = 2,
};
@@ -273,10 +273,10 @@ private:
friend class Impl::CountingScannerGlueTask<CountingScanner>;
};
-class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> {
+class AdvancedCountingScanner : public BaseCountingScanner<AdvancedCountingScanner, CountingState<LoadedScanner::MAX_RE_COUNT>> {
public:
- using State = CountingState<MAX_RE_COUNT>;
-
+ using State = CountingState<MAX_RE_COUNT>;
+
AdvancedCountingScanner() {}
AdvancedCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr);
@@ -309,196 +309,196 @@ private:
friend class Impl::ScannerGlueCommon<AdvancedCountingScanner>;
friend class Impl::CountingScannerGlueTask<AdvancedCountingScanner>;
- friend AdvancedCountingScanner Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(const Fsm&, const Fsm&, bool*);
-};
-
-class NoGlueLimitCountingState {
-public:
- size_t Result(int i) const { return ymax(m_current[i], m_total[i]); }
- void Initialize(size_t initial, size_t regexpsCount) {
- m_state = initial;
- m_current.assign(regexpsCount, 0);
- m_total.assign(regexpsCount, 0);
- }
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- void Reset(size_t regexpId) {
- m_current[regexpId] = 0;
- }
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- void Increment(size_t regexp_id) {
- ++m_current[regexp_id];
- m_total[regexp_id] = ymax(m_total[regexp_id], m_current[regexp_id]);
- }
-
- template<size_t I>
- friend class IncrementPerformer;
-
- template<size_t I>
- friend class ResetPerformer;
-
-private:
- LoadedScanner::InternalState m_state;
- TVector<ui32> m_current;
- TVector<ui32> m_total;
-
- template <class DerivedScanner, class State>
- friend class BaseCountingScanner;
-
-#ifdef PIRE_DEBUG
- yostream& operator << (yostream& s, const State& state)
- {
- s << state.m_state << " ( ";
- for (size_t i = 0; i < state.m_current.size(); ++i)
- s << state.m_current[i] << '/' << state.m_total[i] << ' ';
- return s << ')';
- }
-#endif
+ friend AdvancedCountingScanner Impl::MakeAdvancedCountingScanner<AdvancedCountingScanner>(const Fsm&, const Fsm&, bool*);
};
-
-class NoGlueLimitCountingScanner : public BaseCountingScanner<NoGlueLimitCountingScanner, NoGlueLimitCountingState> {
-public:
- using State = NoGlueLimitCountingState;
- using ActionIndex = ui32;
- using TActionsBuffer = std::unique_ptr<ActionIndex[]>;
-
-private:
- TActionsBuffer ActionsBuffer;
- const ActionIndex* Actions = nullptr;
- bool AdvancedScannerCompatibilityMode = false;
-
-public:
- NoGlueLimitCountingScanner() = default;
- NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr);
- NoGlueLimitCountingScanner(const NoGlueLimitCountingScanner& rhs)
- : BaseCountingScanner(rhs)
- , AdvancedScannerCompatibilityMode(rhs.AdvancedScannerCompatibilityMode)
- {
- if (rhs.ActionsBuffer) {
- Y_ASSERT(rhs.Actions);
- ActionsBuffer = TActionsBuffer(new ActionIndex [*rhs.Actions]);
- std::copy_n(rhs.ActionsBuffer.get(), *rhs.Actions, ActionsBuffer.get());
- Actions = ActionsBuffer.get();
- } else {
- Actions = rhs.Actions;
- }
- }
-
- NoGlueLimitCountingScanner(NoGlueLimitCountingScanner&& other) : BaseCountingScanner() {
- Swap(other);
- }
-
- NoGlueLimitCountingScanner& operator=(NoGlueLimitCountingScanner rhs) {
- Swap(rhs);
- return *this;
- }
-
- void Swap(NoGlueLimitCountingScanner& s) {
- LoadedScanner::Swap(s);
- DoSwap(ActionsBuffer, s.ActionsBuffer);
- DoSwap(Actions, s.Actions);
- DoSwap(AdvancedScannerCompatibilityMode, s.AdvancedScannerCompatibilityMode);
- }
-
- void Initialize(State& state) const
- {
- state.Initialize(m.initial, RegexpsCount());
- }
-
- template <size_t ActualReCount>
- PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
- void TakeActionImpl(State& s, Action a) const
- {
- if (!a) {
- return;
- }
- if (AdvancedScannerCompatibilityMode) {
- AdvancedScannerTakeActionImpl<ActualReCount>(s, a);
- return;
- }
- // Note: it's important to perform resets before increments,
- // as it's possible for one repetition group to stop and another begin at the same symbol
- if (Actions) {
- auto action = Actions + a;
- for (auto reset_count = *action++; reset_count--;) {
- s.Reset(*action++);
- }
- for (auto inc_count = *action++; inc_count--;) {
- s.Increment(*action++);
- }
- } else {
- Y_ASSERT(RegexpsCount() == 1);
- if (a & ResetAction) {
- s.Reset(0);
- }
- if (a & IncrementAction) {
- s.Increment(0);
- }
- }
- }
-
- void Save(yostream* s) const;
-
- void Load(yistream* s);
-
- const void* Mmap(const void* ptr, size_t size);
-
- static NoGlueLimitCountingScanner Glue(const NoGlueLimitCountingScanner& a, const NoGlueLimitCountingScanner& b, size_t maxSize = 0);
-
-private:
- Action RemapAction(Action action)
- {
- return action;
- }
-
- template <class Iterator>
- void GetActions(Action a, ActionIndex id_shift, Iterator output_resets, Iterator output_increments) const {
- if (!a) {
- return;
- }
- if (!Actions) {
- if (a & ResetAction) {
- *output_resets++ = id_shift;
- }
- if (a & NoGlueLimitCountingScanner::IncrementAction) {
- *output_increments++ = id_shift;
- }
- return;
- }
- auto action = Actions + a;
- for (auto output : {output_resets, output_increments}) {
- for (auto count = *action++; count--;) {
- *output++ = *action++ + id_shift;
- }
- }
- }
-
- void AcceptActions(const TVector<ActionIndex>& actions) {
- Y_ASSERT(!Actions);
- Y_ASSERT(!actions.empty());
- Y_ASSERT(actions[0] == actions.size());
-
- ActionsBuffer = TActionsBuffer(new ActionIndex[actions.size()]);
- std::copy(actions.begin(), actions.end(), ActionsBuffer.get());
- Actions = ActionsBuffer.get();
- }
-
- template <size_t ActualReCount>
- void AdvancedScannerTakeActionImpl(State& s, Action a) const {
- if (a & ResetMask) {
- ResetPerformer<ActualReCount>::Do(s, a);
- }
- if (a & IncrementMask) {
- IncrementPerformer<ActualReCount>::Do(s, a);
- }
- }
-
- friend class Impl::ScannerGlueCommon<NoGlueLimitCountingScanner>;
- friend class Impl::CountingScannerGlueTask<NoGlueLimitCountingScanner>;
- friend class Impl::NoGlueLimitCountingScannerGlueTask;
- friend NoGlueLimitCountingScanner Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(const Fsm&, const Fsm&, bool*);
-};
-
-}
-
+class NoGlueLimitCountingState {
+public:
+ size_t Result(int i) const { return ymax(m_current[i], m_total[i]); }
+ void Initialize(size_t initial, size_t regexpsCount) {
+ m_state = initial;
+ m_current.assign(regexpsCount, 0);
+ m_total.assign(regexpsCount, 0);
+ }
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ void Reset(size_t regexpId) {
+ m_current[regexpId] = 0;
+ }
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ void Increment(size_t regexp_id) {
+ ++m_current[regexp_id];
+ m_total[regexp_id] = ymax(m_total[regexp_id], m_current[regexp_id]);
+ }
+
+ template<size_t I>
+ friend class IncrementPerformer;
+
+ template<size_t I>
+ friend class ResetPerformer;
+
+private:
+ LoadedScanner::InternalState m_state;
+ TVector<ui32> m_current;
+ TVector<ui32> m_total;
+
+ template <class DerivedScanner, class State>
+ friend class BaseCountingScanner;
+
+#ifdef PIRE_DEBUG
+ yostream& operator << (yostream& s, const State& state)
+ {
+ s << state.m_state << " ( ";
+ for (size_t i = 0; i < state.m_current.size(); ++i)
+ s << state.m_current[i] << '/' << state.m_total[i] << ' ';
+ return s << ')';
+ }
#endif
+};
+
+
+class NoGlueLimitCountingScanner : public BaseCountingScanner<NoGlueLimitCountingScanner, NoGlueLimitCountingState> {
+public:
+ using State = NoGlueLimitCountingState;
+ using ActionIndex = ui32;
+ using TActionsBuffer = std::unique_ptr<ActionIndex[]>;
+
+private:
+ TActionsBuffer ActionsBuffer;
+ const ActionIndex* Actions = nullptr;
+ bool AdvancedScannerCompatibilityMode = false;
+
+public:
+ NoGlueLimitCountingScanner() = default;
+ NoGlueLimitCountingScanner(const Fsm& re, const Fsm& sep, bool* simple = nullptr);
+ NoGlueLimitCountingScanner(const NoGlueLimitCountingScanner& rhs)
+ : BaseCountingScanner(rhs)
+ , AdvancedScannerCompatibilityMode(rhs.AdvancedScannerCompatibilityMode)
+ {
+ if (rhs.ActionsBuffer) {
+ Y_ASSERT(rhs.Actions);
+ ActionsBuffer = TActionsBuffer(new ActionIndex [*rhs.Actions]);
+ std::copy_n(rhs.ActionsBuffer.get(), *rhs.Actions, ActionsBuffer.get());
+ Actions = ActionsBuffer.get();
+ } else {
+ Actions = rhs.Actions;
+ }
+ }
+
+ NoGlueLimitCountingScanner(NoGlueLimitCountingScanner&& other) : BaseCountingScanner() {
+ Swap(other);
+ }
+
+ NoGlueLimitCountingScanner& operator=(NoGlueLimitCountingScanner rhs) {
+ Swap(rhs);
+ return *this;
+ }
+
+ void Swap(NoGlueLimitCountingScanner& s) {
+ LoadedScanner::Swap(s);
+ DoSwap(ActionsBuffer, s.ActionsBuffer);
+ DoSwap(Actions, s.Actions);
+ DoSwap(AdvancedScannerCompatibilityMode, s.AdvancedScannerCompatibilityMode);
+ }
+
+ void Initialize(State& state) const
+ {
+ state.Initialize(m.initial, RegexpsCount());
+ }
+
+ template <size_t ActualReCount>
+ PIRE_FORCED_INLINE PIRE_HOT_FUNCTION
+ void TakeActionImpl(State& s, Action a) const
+ {
+ if (!a) {
+ return;
+ }
+ if (AdvancedScannerCompatibilityMode) {
+ AdvancedScannerTakeActionImpl<ActualReCount>(s, a);
+ return;
+ }
+ // Note: it's important to perform resets before increments,
+ // as it's possible for one repetition group to stop and another begin at the same symbol
+ if (Actions) {
+ auto action = Actions + a;
+ for (auto reset_count = *action++; reset_count--;) {
+ s.Reset(*action++);
+ }
+ for (auto inc_count = *action++; inc_count--;) {
+ s.Increment(*action++);
+ }
+ } else {
+ Y_ASSERT(RegexpsCount() == 1);
+ if (a & ResetAction) {
+ s.Reset(0);
+ }
+ if (a & IncrementAction) {
+ s.Increment(0);
+ }
+ }
+ }
+
+ void Save(yostream* s) const;
+
+ void Load(yistream* s);
+
+ const void* Mmap(const void* ptr, size_t size);
+
+ static NoGlueLimitCountingScanner Glue(const NoGlueLimitCountingScanner& a, const NoGlueLimitCountingScanner& b, size_t maxSize = 0);
+
+private:
+ Action RemapAction(Action action)
+ {
+ return action;
+ }
+
+ template <class Iterator>
+ void GetActions(Action a, ActionIndex id_shift, Iterator output_resets, Iterator output_increments) const {
+ if (!a) {
+ return;
+ }
+ if (!Actions) {
+ if (a & ResetAction) {
+ *output_resets++ = id_shift;
+ }
+ if (a & NoGlueLimitCountingScanner::IncrementAction) {
+ *output_increments++ = id_shift;
+ }
+ return;
+ }
+ auto action = Actions + a;
+ for (auto output : {output_resets, output_increments}) {
+ for (auto count = *action++; count--;) {
+ *output++ = *action++ + id_shift;
+ }
+ }
+ }
+
+ void AcceptActions(const TVector<ActionIndex>& actions) {
+ Y_ASSERT(!Actions);
+ Y_ASSERT(!actions.empty());
+ Y_ASSERT(actions[0] == actions.size());
+
+ ActionsBuffer = TActionsBuffer(new ActionIndex[actions.size()]);
+ std::copy(actions.begin(), actions.end(), ActionsBuffer.get());
+ Actions = ActionsBuffer.get();
+ }
+
+ template <size_t ActualReCount>
+ void AdvancedScannerTakeActionImpl(State& s, Action a) const {
+ if (a & ResetMask) {
+ ResetPerformer<ActualReCount>::Do(s, a);
+ }
+ if (a & IncrementMask) {
+ IncrementPerformer<ActualReCount>::Do(s, a);
+ }
+ }
+
+ friend class Impl::ScannerGlueCommon<NoGlueLimitCountingScanner>;
+ friend class Impl::CountingScannerGlueTask<NoGlueLimitCountingScanner>;
+ friend class Impl::NoGlueLimitCountingScannerGlueTask;
+ friend NoGlueLimitCountingScanner Impl::MakeAdvancedCountingScanner<NoGlueLimitCountingScanner>(const Fsm&, const Fsm&, bool*);
+};
+
+}
+
+#endif
diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp
index 984d708dfa..27bfd91522 100644
--- a/contrib/libs/pire/pire/fsm.cpp
+++ b/contrib/libs/pire/pire/fsm.cpp
@@ -612,7 +612,7 @@ Fsm& Fsm::Reverse()
out.Connect(j, from, i.first);
// Invert initial and final states
- out.m_final.clear();
+ out.m_final.clear();
out.SetFinal(initial, true);
for (auto i : m_final)
out.Connect(Size(), i, Epsilon);
diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h
index f6e1ff734d..6ce6c9780e 100644
--- a/contrib/libs/pire/pire/run.h
+++ b/contrib/libs/pire/pire/run.h
@@ -31,7 +31,7 @@
#include "platform.h"
#include "defs.h"
-#include <string>
+#include <string>
namespace Pire {
@@ -283,67 +283,67 @@ void Run(const Scanner& sc, typename Scanner::State& st, const char* begin, cons
Run(sc, st, TStringBuf(begin, end));
}
-/// Returns default constructed string_view{} if there is no matching prefix
-/// Returns str.substr(0, 0) if matching prefix is empty
+/// Returns default constructed string_view{} if there is no matching prefix
+/// Returns str.substr(0, 0) if matching prefix is empty
template<class Scanner>
-std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false)
+std::string_view LongestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false)
{
typename Scanner::State st;
sc.Initialize(st);
if (throughBeginMark)
Pire::Step(sc, st, BeginMark);
- const char* pos = (sc.Final(st) ? str.data() : nullptr);
+ const char* pos = (sc.Final(st) ? str.data() : nullptr);
Impl::DoRun(sc, st, str, Impl::LongestPrefixPred<Scanner>(pos));
if (throughEndMark) {
Pire::Step(sc, st, EndMark);
if (sc.Final(st))
- pos = str.data() + str.size();
+ pos = str.data() + str.size();
}
- return pos ? str.substr(0, pos - str.data()) : std::string_view{};
+ return pos ? str.substr(0, pos - str.data()) : std::string_view{};
}
template<class Scanner>
const char* LongestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false)
{
- auto prefix = LongestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark);
- return prefix.data() + prefix.size();
+ auto prefix = LongestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark);
+ return prefix.data() + prefix.size();
}
-/// Returns default constructed string_view{} if there is no matching prefix
-/// Returns str.substr(0, 0) if matching prefix is empty
+/// Returns default constructed string_view{} if there is no matching prefix
+/// Returns str.substr(0, 0) if matching prefix is empty
template<class Scanner>
-std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false)
+std::string_view ShortestPrefix(const Scanner& sc, std::string_view str, bool throughBeginMark = false, bool throughEndMark = false)
{
typename Scanner::State st;
sc.Initialize(st);
if (throughBeginMark)
Pire::Step(sc, st, BeginMark);
if (sc.Final(st))
- return str.substr(0, 0);
- const char* pos = nullptr;
+ return str.substr(0, 0);
+ const char* pos = nullptr;
Impl::DoRun(sc, st, str, Impl::ShortestPrefixPred<Scanner>(pos));
if (throughEndMark) {
Pire::Step(sc, st, EndMark);
- if (sc.Final(st) && !pos)
- pos = str.data() + str.size();
+ if (sc.Final(st) && !pos)
+ pos = str.data() + str.size();
}
- return pos ? str.substr(0, pos - str.data()) : std::string_view{};
+ return pos ? str.substr(0, pos - str.data()) : std::string_view{};
}
template<class Scanner>
const char* ShortestPrefix(const Scanner& sc, const char* begin, const char* end, bool throughBeginMark = false, bool throughEndMark = false)
{
- auto prefix = ShortestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark);
- return prefix.data() + prefix.size();
+ auto prefix = ShortestPrefix(sc, std::string_view(begin, end - begin), throughBeginMark, throughEndMark);
+ return prefix.data() + prefix.size();
}
/// The same as above, but scans string in reverse direction
/// (consider using Fsm::Reverse() for using in this function).
-/// Returns default constructed string_view{} if there is no matching suffix
-/// Returns str.substr(str.size(), 0) if matching suffix is empty
+/// Returns default constructed string_view{} if there is no matching suffix
+/// Returns str.substr(str.size(), 0) if matching suffix is empty
template<class Scanner>
-inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false)
+inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false)
{
typename Scanner::State state;
scanner.Initialize(state);
@@ -352,38 +352,38 @@ inline std::string_view LongestSuffix(const Scanner& scanner, std::string_view s
PIRE_IFDEBUG(Cdbg << "Running LongestSuffix on string " << ystring(str) << Endl);
PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
- std::string_view suffix{};
- auto begin = str.data() + str.size();
- while (begin != str.data() && !scanner.Dead(state)) {
+ std::string_view suffix{};
+ auto begin = str.data() + str.size();
+ while (begin != str.data() && !scanner.Dead(state)) {
if (scanner.Final(state))
- suffix = str.substr(begin - str.data());
- --begin;
- Step(scanner, state, (unsigned char)*begin);
- PIRE_IFDEBUG(Cdbg << *begin << " => state " << StDump(scanner, state) << Endl);
+ suffix = str.substr(begin - str.data());
+ --begin;
+ Step(scanner, state, (unsigned char)*begin);
+ PIRE_IFDEBUG(Cdbg << *begin << " => state " << StDump(scanner, state) << Endl);
}
if (scanner.Final(state))
- suffix = str.substr(begin - str.data());
+ suffix = str.substr(begin - str.data());
if (throughBeginMark) {
Step(scanner, state, BeginMark);
if (scanner.Final(state))
- suffix = str.substr(begin - str.data());
+ suffix = str.substr(begin - str.data());
}
- return suffix;
+ return suffix;
}
template<class Scanner>
inline const char* LongestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) {
- auto suffix = LongestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark);
- return suffix.data() ? suffix.data() - 1 : nullptr;
+ auto suffix = LongestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark);
+ return suffix.data() ? suffix.data() - 1 : nullptr;
}
/// The same as above, but scans string in reverse direction
-/// Returns default constructed string_view{} if there is no matching suffix
-/// Returns str.substr(str.size(), 0) if matching suffix is empty
+/// Returns default constructed string_view{} if there is no matching suffix
+/// Returns str.substr(str.size(), 0) if matching suffix is empty
template<class Scanner>
-inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false)
+inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view str, bool throughEndMark = false, bool throughBeginMark = false)
{
- auto begin = str.data() + str.size();
+ auto begin = str.data() + str.size();
typename Scanner::State state;
scanner.Initialize(state);
if (throughEndMark)
@@ -391,20 +391,20 @@ inline std::string_view ShortestSuffix(const Scanner& scanner, std::string_view
PIRE_IFDEBUG(Cdbg << "Running ShortestSuffix on string " << ystring(str) << Endl);
PIRE_IFDEBUG(Cdbg << "Initial state " << StDump(scanner, state) << Endl);
- while (begin != str.data() && !scanner.Final(state) && !scanner.Dead(state)) {
- --begin;
- scanner.Next(state, (unsigned char)*begin);
+ while (begin != str.data() && !scanner.Final(state) && !scanner.Dead(state)) {
+ --begin;
+ scanner.Next(state, (unsigned char)*begin);
PIRE_IFDEBUG(Cdbg << *rbegin << " => state " << StDump(scanner, state) << Endl);
}
if (throughBeginMark)
Step(scanner, state, BeginMark);
- return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{};
+ return scanner.Final(state) ? str.substr(begin - str.data()) : std::string_view{};
}
template<class Scanner>
inline const char* ShortestSuffix(const Scanner& scanner, const char* rbegin, const char* rend, bool throughEndMark = false, bool throughBeginMark = false) {
- auto suffix = ShortestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark);
- return suffix.data() ? suffix.data() - 1 : nullptr;
+ auto suffix = ShortestSuffix(scanner, std::string_view(rend + 1, rbegin - rend), throughEndMark, throughBeginMark);
+ return suffix.data() ? suffix.data() - 1 : nullptr;
}
diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp
index 3956e3c6ed..5c723ca427 100644
--- a/contrib/libs/pire/pire/scanner_io.cpp
+++ b/contrib/libs/pire/pire/scanner_io.cpp
@@ -169,14 +169,14 @@ void SlowScanner::Load(yistream* s)
Swap(sc);
}
-void LoadedScanner::Save(yostream* s) const {
- Save(s, ScannerIOTypes::LoadedScanner);
-}
-
-void LoadedScanner::Save(yostream* s, ui32 type) const
+void LoadedScanner::Save(yostream* s) const {
+ Save(s, ScannerIOTypes::LoadedScanner);
+}
+
+void LoadedScanner::Save(yostream* s, ui32 type) const
{
- Y_ASSERT(type == ScannerIOTypes::LoadedScanner || type == ScannerIOTypes::NoGlueLimitCountingScanner);
- SavePodType(s, Header(type, sizeof(m)));
+ Y_ASSERT(type == ScannerIOTypes::LoadedScanner || type == ScannerIOTypes::NoGlueLimitCountingScanner);
+ SavePodType(s, Header(type, sizeof(m)));
Impl::AlignSave(s, sizeof(Header));
Locals mc = m;
mc.initial -= reinterpret_cast<size_t>(m_jumps);
@@ -188,17 +188,17 @@ void LoadedScanner::Save(yostream* s, ui32 type) const
Impl::AlignedSaveArray(s, m_tags, m.statesCount);
}
-void LoadedScanner::Load(yistream* s) {
- Load(s, nullptr);
-}
-
-void LoadedScanner::Load(yistream* s, ui32* type)
+void LoadedScanner::Load(yistream* s) {
+ Load(s, nullptr);
+}
+
+void LoadedScanner::Load(yistream* s, ui32* type)
{
LoadedScanner sc;
Header header = Impl::ValidateHeader(s, ScannerIOTypes::LoadedScanner, sizeof(sc.m));
- if (type) {
- *type = header.Type;
- }
+ if (type) {
+ *type = header.Type;
+ }
LoadPodType(s, sc.m);
Impl::AlignLoad(s, sizeof(sc.m));
sc.m_buffer = BufferType(new char[sc.BufSize()]);
diff --git a/contrib/libs/pire/pire/scanners/common.h b/contrib/libs/pire/pire/scanners/common.h
index de5ea0af7b..a92684cf3d 100644
--- a/contrib/libs/pire/pire/scanners/common.h
+++ b/contrib/libs/pire/pire/scanners/common.h
@@ -36,8 +36,8 @@ namespace Pire {
Scanner = 1,
SimpleScanner = 2,
SlowScanner = 3,
- LoadedScanner = 4,
- NoGlueLimitCountingScanner = 5,
+ LoadedScanner = 4,
+ NoGlueLimitCountingScanner = 5,
};
}
@@ -68,12 +68,12 @@ namespace Pire {
throw Error("Serialized regexp incompatible with your system");
if (Version != RE_VERSION && Version != RE_VERSION_WITH_MACTIONS)
throw Error("You are trying to used an incompatible version of a serialized regexp");
- if (type != ScannerIOTypes::NoScanner && type != Type &&
- !(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) {
- throw Error("Serialized regexp incompatible with your system");
- }
- if (hdrsize != 0 && HdrSize != hdrsize)
+ if (type != ScannerIOTypes::NoScanner && type != Type &&
+ !(type == ScannerIOTypes::LoadedScanner && Type == ScannerIOTypes::NoGlueLimitCountingScanner)) {
throw Error("Serialized regexp incompatible with your system");
+ }
+ if (hdrsize != 0 && HdrSize != hdrsize)
+ throw Error("Serialized regexp incompatible with your system");
}
};
diff --git a/contrib/libs/pire/pire/scanners/half_final.h b/contrib/libs/pire/pire/scanners/half_final.h
index 071c3414a2..1755114302 100644
--- a/contrib/libs/pire/pire/scanners/half_final.h
+++ b/contrib/libs/pire/pire/scanners/half_final.h
@@ -210,13 +210,13 @@ private:
void BuildFinals(const HalfFinalFsm& fsm) {
Y_ASSERT(Scanner::m_buffer);
Y_ASSERT(fsm.GetFsm().Size() == Scanner::Size());
- auto finalWriter = Scanner::m_final;
+ auto finalWriter = Scanner::m_final;
for (size_t state = 0; state < Scanner::Size(); ++state) {
- Scanner::m_finalIndex[state] = finalWriter - Scanner::m_final;
+ Scanner::m_finalIndex[state] = finalWriter - Scanner::m_final;
for (size_t i = 0; i < fsm.GetCount(state); i++) {
- *finalWriter++ = 0;
+ *finalWriter++ = 0;
}
- *finalWriter++ = static_cast<size_t>(-1);
+ *finalWriter++ = static_cast<size_t>(-1);
}
}
diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h
index 120dc403b7..ddc6a84b5b 100644
--- a/contrib/libs/pire/pire/scanners/loaded.h
+++ b/contrib/libs/pire/pire/scanners/loaded.h
@@ -103,13 +103,13 @@ protected:
}
LoadedScanner& operator = (const LoadedScanner& s) { LoadedScanner(s).Swap(*this); return *this; }
- LoadedScanner (LoadedScanner&& other) : LoadedScanner() {
- Swap(other);
- }
- LoadedScanner& operator=(LoadedScanner&& other) {
- Swap(other);
- return *this;
- }
+ LoadedScanner (LoadedScanner&& other) : LoadedScanner() {
+ Swap(other);
+ }
+ LoadedScanner& operator=(LoadedScanner&& other) {
+ Swap(other);
+ return *this;
+ }
public:
size_t Size() const { return m.statesCount; }
@@ -120,19 +120,19 @@ public:
size_t LettersCount() const { return m.lettersCount; }
- const void* Mmap(const void* ptr, size_t size) {
- return Mmap(ptr, size, nullptr);
- }
-
- const void* Mmap(const void* ptr, size_t size, ui32* type)
+ const void* Mmap(const void* ptr, size_t size) {
+ return Mmap(ptr, size, nullptr);
+ }
+
+ const void* Mmap(const void* ptr, size_t size, ui32* type)
{
Impl::CheckAlign(ptr);
LoadedScanner s;
const size_t* p = reinterpret_cast<const size_t*>(ptr);
Header header = Impl::ValidateHeader(p, size, ScannerIOTypes::LoadedScanner, sizeof(s.m));
- if (type) {
- *type = header.Type;
- }
+ if (type) {
+ *type = header.Type;
+ }
Locals* locals;
Impl::MapPtr(locals, 1, p, size);
@@ -152,9 +152,9 @@ public:
return (const void*) p;
}
- void Save(yostream*, ui32 type) const;
+ void Save(yostream*, ui32 type) const;
void Save(yostream*) const;
- void Load(yistream*, ui32* type);
+ void Load(yistream*, ui32* type);
void Load(yistream*);
template<class Eq>
diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h
index 29679e416e..b993808bf7 100644
--- a/contrib/libs/pire/pire/scanners/multi.h
+++ b/contrib/libs/pire/pire/scanners/multi.h
@@ -24,7 +24,7 @@
#ifndef PIRE_SCANNERS_MULTI_H
#define PIRE_SCANNERS_MULTI_H
-#include <cstring>
+#include <cstring>
#include <string.h>
#include <contrib/libs/pire/pire/approx_matching.h>
#include <contrib/libs/pire/pire/fsm.h>
@@ -348,7 +348,7 @@ protected:
template<class Eq>
void Init(size_t states, const Partition<Char, Eq>& letters, size_t finalStatesCount, size_t startState, size_t regexpsCount = 1)
{
- std::memset(&m, 0, sizeof(m));
+ std::memset(&m, 0, sizeof(m));
m.relocationSignature = Relocation::Signature;
m.shortcuttingSignature = Shortcutting::Signature;
m.statesCount = states;
@@ -407,7 +407,7 @@ protected:
m.relocationSignature = Relocation::Signature;
m.shortcuttingSignature = Shortcutting::Signature;
m_buffer = BufferType(new char[BufSize() + sizeof(size_t)]);
- std::memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t));
+ std::memset(m_buffer.Get(), 0, BufSize() + sizeof(size_t));
Markup(AlignUp(m_buffer.Get(), sizeof(size_t)));
// Values in letter-to-leterclass table take into account row header size
@@ -513,12 +513,12 @@ protected:
void FinishBuild()
{
Y_ASSERT(m_buffer);
- auto finalWriter = m_final;
+ auto finalWriter = m_final;
for (size_t state = 0; state != Size(); ++state) {
- m_finalIndex[state] = finalWriter - m_final;
+ m_finalIndex[state] = finalWriter - m_final;
if (Header(IndexToState(state)).Common.Flags & FinalFlag)
- *finalWriter++ = 0;
- *finalWriter++ = static_cast<size_t>(-1);
+ *finalWriter++ = 0;
+ *finalWriter++ = static_cast<size_t>(-1);
}
BuildShortcuts();
}
@@ -1022,12 +1022,12 @@ public:
this->SetSc(THolder<Scanner>(new Scanner));
Sc().Init(states.size(), Letters(), finalTableSize, size_t(0), Lhs().RegexpsCount() + Rhs().RegexpsCount());
- auto finalWriter = Sc().m_final;
+ auto finalWriter = Sc().m_final;
for (size_t state = 0; state != states.size(); ++state) {
- Sc().m_finalIndex[state] = finalWriter - Sc().m_final;
- finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter);
- finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter);
- *finalWriter++ = static_cast<size_t>(-1);
+ Sc().m_finalIndex[state] = finalWriter - Sc().m_final;
+ finalWriter = Shift(Lhs().AcceptedRegexps(states[state].first), 0, finalWriter);
+ finalWriter = Shift(Rhs().AcceptedRegexps(states[state].second), Lhs().RegexpsCount(), finalWriter);
+ *finalWriter++ = static_cast<size_t>(-1);
Sc().SetTag(state, ((Lhs().Final(states[state].first) || Rhs().Final(states[state].second)) ? Scanner::FinalFlag : 0)
| ((Lhs().Dead(states[state].first) && Rhs().Dead(states[state].second)) ? Scanner::DeadFlag : 0));
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp
index ba321f9c29..82ca3880d1 100644
--- a/library/cpp/regex/hyperscan/hyperscan.cpp
+++ b/library/cpp/regex/hyperscan/hyperscan.cpp
@@ -17,201 +17,201 @@ namespace NHyperscan {
using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
namespace NPrivate {
- ERuntime DetectCurrentRuntime() {
+ ERuntime DetectCurrentRuntime() {
if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) {
- return ERuntime::AVX512;
+ return ERuntime::AVX512;
} else if (NX86::HaveAVX() && NX86::HaveAVX2()) {
- return ERuntime::AVX2;
+ return ERuntime::AVX2;
} else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) {
- return ERuntime::Corei7;
+ return ERuntime::Corei7;
} else {
- return ERuntime::Core2;
+ return ERuntime::Core2;
}
}
- TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {
- switch (runtime) {
- default:
- Y_ASSERT(false);
+ TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {
+ switch (runtime) {
+ default:
+ Y_ASSERT(false);
[[fallthrough]];
- case ERuntime::Core2:
- case ERuntime::Corei7:
- return 0;
- case ERuntime::AVX2:
- return CPU_FEATURES_AVX2;
- case ERuntime::AVX512:
- return CPU_FEATURES_AVX512;
- }
+ case ERuntime::Core2:
+ case ERuntime::Corei7:
+ return 0;
+ case ERuntime::AVX2:
+ return CPU_FEATURES_AVX2;
+ case ERuntime::AVX512:
+ return CPU_FEATURES_AVX512;
+ }
}
- hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
- hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
- return platformInfo;
+ hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
+ hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
+ return platformInfo;
}
-
+
hs_platform_info_t MakeCurrentPlatformInfo() {
return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime()));
}
- TImpl::TImpl(ERuntime runtime) {
- switch (runtime) {
- default:
- Y_ASSERT(false);
+ TImpl::TImpl(ERuntime runtime) {
+ switch (runtime) {
+ default:
+ Y_ASSERT(false);
[[fallthrough]];
- case ERuntime::Core2:
- AllocScratch = core2_hs_alloc_scratch;
- Scan = core2_hs_scan;
- SerializeDatabase = core2_hs_serialize_database;
- DeserializeDatabase = core2_hs_deserialize_database;
- break;
- case ERuntime::Corei7:
- AllocScratch = corei7_hs_alloc_scratch;
- Scan = corei7_hs_scan;
- SerializeDatabase = corei7_hs_serialize_database;
- DeserializeDatabase = corei7_hs_deserialize_database;
- break;
- case ERuntime::AVX2:
- AllocScratch = avx2_hs_alloc_scratch;
- Scan = avx2_hs_scan;
- SerializeDatabase = avx2_hs_serialize_database;
- DeserializeDatabase = avx2_hs_deserialize_database;
- break;
- case ERuntime::AVX512:
- AllocScratch = avx512_hs_alloc_scratch;
- Scan = avx512_hs_scan;
- SerializeDatabase = avx512_hs_serialize_database;
- DeserializeDatabase = avx512_hs_deserialize_database;
- }
+ case ERuntime::Core2:
+ AllocScratch = core2_hs_alloc_scratch;
+ Scan = core2_hs_scan;
+ SerializeDatabase = core2_hs_serialize_database;
+ DeserializeDatabase = core2_hs_deserialize_database;
+ break;
+ case ERuntime::Corei7:
+ AllocScratch = corei7_hs_alloc_scratch;
+ Scan = corei7_hs_scan;
+ SerializeDatabase = corei7_hs_serialize_database;
+ DeserializeDatabase = corei7_hs_deserialize_database;
+ break;
+ case ERuntime::AVX2:
+ AllocScratch = avx2_hs_alloc_scratch;
+ Scan = avx2_hs_scan;
+ SerializeDatabase = avx2_hs_serialize_database;
+ DeserializeDatabase = avx2_hs_deserialize_database;
+ break;
+ case ERuntime::AVX512:
+ AllocScratch = avx512_hs_alloc_scratch;
+ Scan = avx512_hs_scan;
+ SerializeDatabase = avx512_hs_serialize_database;
+ DeserializeDatabase = avx512_hs_deserialize_database;
+ }
}
-
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
- hs_database_t* rawDb = nullptr;
- hs_compile_error_t* rawCompileErr = nullptr;
- hs_error_t status = hs_compile(
- regex.begin(),
- flags,
- HS_MODE_BLOCK,
- platform,
- &rawDb,
- &rawCompileErr);
- TDatabase db(rawDb);
- NHyperscan::TCompileError compileError(rawCompileErr);
- if (status != HS_SUCCESS) {
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
+ hs_database_t* rawDb = nullptr;
+ hs_compile_error_t* rawCompileErr = nullptr;
+ hs_error_t status = hs_compile(
+ regex.begin(),
+ flags,
+ HS_MODE_BLOCK,
+ platform,
+ &rawDb,
+ &rawCompileErr);
+ TDatabase db(rawDb);
+ NHyperscan::TCompileError compileError(rawCompileErr);
+ if (status != HS_SUCCESS) {
ythrow TCompileException()
- << "Failed to compile regex: " << regex << ". "
- << "Error message (hyperscan): " << compileError->message;
- }
- return db;
- }
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- hs_platform_info_t* platform,
- const TVector<const hs_expr_ext_t*>* extendedParameters) {
- unsigned int count = regexs.size();
- if (flags.size() != count) {
- ythrow yexception()
- << "Mismatch of sizes vectors passed to CompileMulti. "
- << "size(regexs) = " << regexs.size() << ". "
- << "size(flags) = " << flags.size() << ".";
- }
- if (ids.size() != count) {
- ythrow yexception()
- << "Mismatch of sizes vectors passed to CompileMulti. "
- << "size(regexs) = " << regexs.size() << ". "
- << "size(ids) = " << ids.size() << ".";
- }
- if (extendedParameters && extendedParameters->size() != count) {
- ythrow yexception()
- << "Mismatch of sizes vectors passed to CompileMulti. "
- << "size(regexs) = " << regexs.size() << ". "
- << "size(extendedParameters) = " << extendedParameters->size() << ".";
+ << "Failed to compile regex: " << regex << ". "
+ << "Error message (hyperscan): " << compileError->message;
}
- hs_database_t* rawDb = nullptr;
- hs_compile_error_t* rawCompileErr = nullptr;
- hs_error_t status = hs_compile_ext_multi(
- regexs.data(),
- flags.data(),
- ids.data(),
- extendedParameters ? extendedParameters->data() : nullptr,
- count,
- HS_MODE_BLOCK,
- platform,
- &rawDb,
- &rawCompileErr);
- TDatabase db(rawDb);
- NHyperscan::TCompileError compileError(rawCompileErr);
- if (status != HS_SUCCESS) {
- if (compileError->expression >= 0) {
- const char* regex = regexs[compileError->expression];
- ythrow TCompileException()
- << "Failed to compile regex: " << regex << ". "
- << "Error message (hyperscan): " << compileError->message;
- } else {
- ythrow TCompileException()
- << "Failed to compile multiple regexs. "
- << "Error message (hyperscan): " << compileError->message;
- }
- }
- return db;
- }
-
- bool Matches(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
- const TImpl& impl) {
- bool result = false;
- auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {
- result = true;
- return 1; // stop scan
- };
- Scan(
- db,
- scratch,
- text,
- callback,
- impl);
- return result;
+ return db;
}
- } // namespace NPrivate
-
- TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ hs_platform_info_t* platform,
+ const TVector<const hs_expr_ext_t*>* extendedParameters) {
+ unsigned int count = regexs.size();
+ if (flags.size() != count) {
+ ythrow yexception()
+ << "Mismatch of sizes vectors passed to CompileMulti. "
+ << "size(regexs) = " << regexs.size() << ". "
+ << "size(flags) = " << flags.size() << ".";
+ }
+ if (ids.size() != count) {
+ ythrow yexception()
+ << "Mismatch of sizes vectors passed to CompileMulti. "
+ << "size(regexs) = " << regexs.size() << ". "
+ << "size(ids) = " << ids.size() << ".";
+ }
+ if (extendedParameters && extendedParameters->size() != count) {
+ ythrow yexception()
+ << "Mismatch of sizes vectors passed to CompileMulti. "
+ << "size(regexs) = " << regexs.size() << ". "
+ << "size(extendedParameters) = " << extendedParameters->size() << ".";
+ }
+ hs_database_t* rawDb = nullptr;
+ hs_compile_error_t* rawCompileErr = nullptr;
+ hs_error_t status = hs_compile_ext_multi(
+ regexs.data(),
+ flags.data(),
+ ids.data(),
+ extendedParameters ? extendedParameters->data() : nullptr,
+ count,
+ HS_MODE_BLOCK,
+ platform,
+ &rawDb,
+ &rawCompileErr);
+ TDatabase db(rawDb);
+ NHyperscan::TCompileError compileError(rawCompileErr);
+ if (status != HS_SUCCESS) {
+ if (compileError->expression >= 0) {
+ const char* regex = regexs[compileError->expression];
+ ythrow TCompileException()
+ << "Failed to compile regex: " << regex << ". "
+ << "Error message (hyperscan): " << compileError->message;
+ } else {
+ ythrow TCompileException()
+ << "Failed to compile multiple regexs. "
+ << "Error message (hyperscan): " << compileError->message;
+ }
+ }
+ return db;
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ const TImpl& impl) {
+ bool result = false;
+ auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {
+ result = true;
+ return 1; // stop scan
+ };
+ Scan(
+ db,
+ scratch,
+ text,
+ callback,
+ impl);
+ return result;
+ }
+ } // namespace NPrivate
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
return NPrivate::Compile(regex, flags, &platformInfo);
}
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
- auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
- return NPrivate::Compile(regex, flags, &platformInfo);
- }
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- const TVector<const hs_expr_ext_t*>* extendedParameters)
- {
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
+ auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
+ return NPrivate::Compile(regex, flags, &platformInfo);
+ }
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ const TVector<const hs_expr_ext_t*>* extendedParameters)
+ {
auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
- }
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- TCPUFeatures cpuFeatures,
- const TVector<const hs_expr_ext_t*>* extendedParameters)
- {
- auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
- return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
- }
-
+ }
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ TCPUFeatures cpuFeatures,
+ const TVector<const hs_expr_ext_t*>* extendedParameters)
+ {
+ auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
+ return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
+ }
+
TScratch MakeScratch(const TDatabase& db) {
hs_scratch_t* rawScratch = nullptr;
- hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
+ hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
NHyperscan::TScratch scratch(rawScratch);
if (status != HS_SUCCESS) {
ythrow yexception() << "Failed to make scratch for hyperscan database";
@@ -221,7 +221,7 @@ namespace NHyperscan {
void GrowScratch(TScratch& scratch, const TDatabase& db) {
hs_scratch_t* rawScratch = scratch.Get();
- hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
+ hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
if (rawScratch != scratch.Get()) {
Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch
scratch.Reset(rawScratch);
@@ -244,9 +244,9 @@ namespace NHyperscan {
bool Matches(
const TDatabase& db,
const TScratch& scratch,
- const TStringBuf& text)
- {
- return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
+ const TStringBuf& text)
+ {
+ return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
}
TString Serialize(const TDatabase& db) {
@@ -271,11 +271,11 @@ namespace NHyperscan {
&rawDb);
TDatabase db(rawDb);
if (status != HS_SUCCESS) {
- if (status == HS_DB_PLATFORM_ERROR) {
- ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
- } else {
- ythrow yexception() << "Failed to deserialize hyperscan database";
- }
+ if (status == HS_DB_PLATFORM_ERROR) {
+ ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
+ } else {
+ ythrow yexception() << "Failed to deserialize hyperscan database";
+ }
}
return db;
}
diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h
index 1c8f404389..ef50cca08e 100644
--- a/library/cpp/regex/hyperscan/hyperscan.h
+++ b/library/cpp/regex/hyperscan/hyperscan.h
@@ -9,14 +9,14 @@
#include <util/system/cpu_id.h>
namespace NHyperscan {
- using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
- constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
- constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
-
- template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
+ using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
+ constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
+ constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
+
+ template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
class TDeleter {
public:
- template<typename T>
+ template<typename T>
static void Destroy(T* ptr) {
NativeDeleter(ptr);
}
@@ -26,127 +26,127 @@ namespace NHyperscan {
using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
- class TCompileException : public yexception {
+ class TCompileException : public yexception {
};
-
+
namespace NPrivate {
- enum class ERuntime {
- Core2 = 0,
- Corei7 = 1,
- AVX2 = 2,
- AVX512 = 3
- };
-
- ERuntime DetectCurrentRuntime();
-
- TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
-
- hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
-
+ enum class ERuntime {
+ Core2 = 0,
+ Corei7 = 1,
+ AVX2 = 2,
+ AVX512 = 3
+ };
+
+ ERuntime DetectCurrentRuntime();
+
+ TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
+
+ hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
+
struct TImpl {
- hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
-
- hs_error_t (*Scan)(const hs_database_t* db, const char* data,
- unsigned length, unsigned flags, hs_scratch_t* scratch,
- match_event_handler onEvent, void* userCtx);
-
- hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
-
- hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
-
- TImpl() : TImpl(DetectCurrentRuntime()) {}
-
- explicit TImpl(ERuntime runtime);
+ hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
+
+ hs_error_t (*Scan)(const hs_database_t* db, const char* data,
+ unsigned length, unsigned flags, hs_scratch_t* scratch,
+ match_event_handler onEvent, void* userCtx);
+
+ hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
+
+ hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
+
+ TImpl() : TImpl(DetectCurrentRuntime()) {}
+
+ explicit TImpl(ERuntime runtime);
};
-
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- hs_platform_info_t* platform,
- const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
-
- // We need to parametrize Scan and Matches functions for testing purposes
- template<typename TCallback>
- void Scan(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
- TCallback& callback, // applied to index of matched regex
- const TImpl& impl
- ) {
- struct TCallbackWrapper {
- static int EventHandler(
- unsigned int id,
- unsigned long long from,
- unsigned long long to,
- unsigned int flags,
- void* ctx) {
- Y_UNUSED(flags);
- TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
- if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
- return callback2(id, from, to);
- } else {
- callback2(id, from, to);
- return 0;
- }
- }
- };
- unsigned int flags = 0; // unused at present
- hs_error_t status = impl.Scan(
- db.Get(),
- text.begin(),
- text.size(),
- flags,
- scratch.Get(),
- &TCallbackWrapper::EventHandler,
- &callback);
- if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
- ythrow yexception() << "Failed to scan against text: " << text;
- }
- }
-
- bool Matches(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
- const TImpl& impl);
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ hs_platform_info_t* platform,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
+ // We need to parametrize Scan and Matches functions for testing purposes
+ template<typename TCallback>
+ void Scan(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ TCallback& callback, // applied to index of matched regex
+ const TImpl& impl
+ ) {
+ struct TCallbackWrapper {
+ static int EventHandler(
+ unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void* ctx) {
+ Y_UNUSED(flags);
+ TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
+ if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
+ return callback2(id, from, to);
+ } else {
+ callback2(id, from, to);
+ return 0;
+ }
+ }
+ };
+ unsigned int flags = 0; // unused at present
+ hs_error_t status = impl.Scan(
+ db.Get(),
+ text.begin(),
+ text.size(),
+ flags,
+ scratch.Get(),
+ &TCallbackWrapper::EventHandler,
+ &callback);
+ if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
+ ythrow yexception() << "Failed to scan against text: " << text;
+ }
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ const TImpl& impl);
}
TDatabase Compile(const TStringBuf& regex, unsigned int flags);
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
-
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
+
TDatabase CompileMulti(
const TVector<const char*>& regexs,
const TVector<unsigned int>& flags,
const TVector<unsigned int>& ids,
- TCPUFeatures cpuFeatures,
const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ TCPUFeatures cpuFeatures,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
TScratch MakeScratch(const TDatabase& db);
void GrowScratch(TScratch& scratch, const TDatabase& db);
TScratch CloneScratch(const TScratch& scratch);
- template<typename TCallback>
+ template<typename TCallback>
void Scan(
const TDatabase& db,
const TScratch& scratch,
const TStringBuf& text,
TCallback& callback // applied to index of matched regex
) {
- NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
+ NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
}
bool Matches(
diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
index 9caa53f2e7..7abbaa4b08 100644
--- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
+++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
@@ -4,12 +4,12 @@
#include <util/generic/set.h>
-#include <array>
-#include <algorithm>
-
+#include <array>
+#include <algorithm>
+
Y_UNIT_TEST_SUITE(HyperscanWrappers) {
using namespace NHyperscan;
- using namespace NHyperscan::NPrivate;
+ using namespace NHyperscan::NPrivate;
Y_UNIT_TEST(CompileAndScan) {
TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
@@ -120,112 +120,112 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
scratch1.Reset();
UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo"));
}
-
- class TSimpleSingleRegex {
- public:
- static TDatabase Compile(TCPUFeatures cpuFeatures) {
- return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures);
- }
- static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
- UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
- }
- };
-
- // This regex uses AVX2 instructions on long (>70) texts.
- // It crushes when compiled for machine with AVX2 and run on machine without it.
- class TAvx2SingleRegex {
- public:
- static TDatabase Compile(TCPUFeatures cpuFeatures) {
- auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+"
- "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}";
- unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY;
- return NHyperscan::Compile(regex, flags, cpuFeatures);
- }
- static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(
- db,
- scratch,
- "_________________________________________________________________"
- "фу.bar"
- "_________________________________________________________________",
- impl));
- UNIT_ASSERT(!NHyperscan::NPrivate::Matches(
- db,
- scratch,
- "_________________________________________________________________"
- "фу"
- "_________________________________________________________________",
- impl));
- }
- };
-
- class TSimpleMultiRegex {
- public:
- static TDatabase Compile(TCPUFeatures cpuFeatures) {
- return NHyperscan::CompileMulti(
- {
- "foo",
- "bar",
- },
- {
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,
- },
- {
- 42,
- 241,
- },
- cpuFeatures);
- }
- static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
-
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl));
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl));
- UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
-
- TSet<unsigned int> foundIds;
- auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {
- foundIds.insert(id);
- };
- NHyperscan::NPrivate::Scan(
- db,
- scratch,
- "fooBaR",
- callback,
- impl);
- UNIT_ASSERT_EQUAL(foundIds.size(), 2);
- UNIT_ASSERT(foundIds.contains(42));
- UNIT_ASSERT(foundIds.contains(241));
- }
- };
-
- template <class Regex>
- void TestCrossPlatformCompile() {
- const std::array<ERuntime, 4> runtimes = {
- ERuntime::Core2,
- ERuntime::Corei7,
- ERuntime::AVX2,
- ERuntime::AVX512
- };
-
- // Unfortunately, we cannot emulate runtimes with more capabilities than current machine.
- auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime());
- Y_ASSERT(currentRuntimeIter != runtimes.cend());
-
- for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) {
- auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime));
- Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime});
- }
- }
-
- Y_UNIT_TEST(CrossPlatformCompile) {
- TestCrossPlatformCompile<TSimpleSingleRegex>();
- TestCrossPlatformCompile<TAvx2SingleRegex>();
- TestCrossPlatformCompile<TSimpleMultiRegex>();
- }
+
+ class TSimpleSingleRegex {
+ public:
+ static TDatabase Compile(TCPUFeatures cpuFeatures) {
+ return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures);
+ }
+ static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
+ UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
+ }
+ };
+
+ // This regex uses AVX2 instructions on long (>70) texts.
+ // It crushes when compiled for machine with AVX2 and run on machine without it.
+ class TAvx2SingleRegex {
+ public:
+ static TDatabase Compile(TCPUFeatures cpuFeatures) {
+ auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+"
+ "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}";
+ unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY;
+ return NHyperscan::Compile(regex, flags, cpuFeatures);
+ }
+ static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(
+ db,
+ scratch,
+ "_________________________________________________________________"
+ "фу.bar"
+ "_________________________________________________________________",
+ impl));
+ UNIT_ASSERT(!NHyperscan::NPrivate::Matches(
+ db,
+ scratch,
+ "_________________________________________________________________"
+ "фу"
+ "_________________________________________________________________",
+ impl));
+ }
+ };
+
+ class TSimpleMultiRegex {
+ public:
+ static TDatabase Compile(TCPUFeatures cpuFeatures) {
+ return NHyperscan::CompileMulti(
+ {
+ "foo",
+ "bar",
+ },
+ {
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,
+ },
+ {
+ 42,
+ 241,
+ },
+ cpuFeatures);
+ }
+ static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl));
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl));
+ UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
+
+ TSet<unsigned int> foundIds;
+ auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {
+ foundIds.insert(id);
+ };
+ NHyperscan::NPrivate::Scan(
+ db,
+ scratch,
+ "fooBaR",
+ callback,
+ impl);
+ UNIT_ASSERT_EQUAL(foundIds.size(), 2);
+ UNIT_ASSERT(foundIds.contains(42));
+ UNIT_ASSERT(foundIds.contains(241));
+ }
+ };
+
+ template <class Regex>
+ void TestCrossPlatformCompile() {
+ const std::array<ERuntime, 4> runtimes = {
+ ERuntime::Core2,
+ ERuntime::Corei7,
+ ERuntime::AVX2,
+ ERuntime::AVX512
+ };
+
+ // Unfortunately, we cannot emulate runtimes with more capabilities than current machine.
+ auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime());
+ Y_ASSERT(currentRuntimeIter != runtimes.cend());
+
+ for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) {
+ auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime));
+ Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime});
+ }
+ }
+
+ Y_UNIT_TEST(CrossPlatformCompile) {
+ TestCrossPlatformCompile<TSimpleSingleRegex>();
+ TestCrossPlatformCompile<TAvx2SingleRegex>();
+ TestCrossPlatformCompile<TSimpleMultiRegex>();
+ }
}
diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make
index e99130ae18..e58d93502c 100644
--- a/library/cpp/regex/hyperscan/ya.make
+++ b/library/cpp/regex/hyperscan/ya.make
@@ -15,5 +15,5 @@ SRCS(
)
END()
-
-RECURSE_FOR_TESTS(ut)
+
+RECURSE_FOR_TESTS(ut)