aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex
diff options
context:
space:
mode:
authorjakovenko-dm <jakovenko-dm@yandex-team.ru>2022-02-10 16:48:06 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:48:06 +0300
commit7077baee21e33a3ad2e790527b1c50b22c244db3 (patch)
treee719eb81a7dbb542f49340ad8c36c65d58ac42f6 /library/cpp/regex
parent4282ec504ababea092138c3af45d5399d01c194a (diff)
downloadydb-7077baee21e33a3ad2e790527b1c50b22c244db3.tar.gz
Restoring authorship annotation for <jakovenko-dm@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.cpp352
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.h200
-rw-r--r--library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp224
-rw-r--r--library/cpp/regex/hyperscan/ya.make4
4 files changed, 390 insertions, 390 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp
index ba321f9c29..82ca3880d1 100644
--- a/library/cpp/regex/hyperscan/hyperscan.cpp
+++ b/library/cpp/regex/hyperscan/hyperscan.cpp
@@ -17,201 +17,201 @@ namespace NHyperscan {
using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
namespace NPrivate {
- ERuntime DetectCurrentRuntime() {
+ ERuntime DetectCurrentRuntime() {
if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) {
- return ERuntime::AVX512;
+ return ERuntime::AVX512;
} else if (NX86::HaveAVX() && NX86::HaveAVX2()) {
- return ERuntime::AVX2;
+ return ERuntime::AVX2;
} else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) {
- return ERuntime::Corei7;
+ return ERuntime::Corei7;
} else {
- return ERuntime::Core2;
+ return ERuntime::Core2;
}
}
- TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {
- switch (runtime) {
- default:
- Y_ASSERT(false);
+ TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {
+ switch (runtime) {
+ default:
+ Y_ASSERT(false);
[[fallthrough]];
- case ERuntime::Core2:
- case ERuntime::Corei7:
- return 0;
- case ERuntime::AVX2:
- return CPU_FEATURES_AVX2;
- case ERuntime::AVX512:
- return CPU_FEATURES_AVX512;
- }
+ case ERuntime::Core2:
+ case ERuntime::Corei7:
+ return 0;
+ case ERuntime::AVX2:
+ return CPU_FEATURES_AVX2;
+ case ERuntime::AVX512:
+ return CPU_FEATURES_AVX512;
+ }
}
- hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
- hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
- return platformInfo;
+ hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
+ hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
+ return platformInfo;
}
-
+
hs_platform_info_t MakeCurrentPlatformInfo() {
return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime()));
}
- TImpl::TImpl(ERuntime runtime) {
- switch (runtime) {
- default:
- Y_ASSERT(false);
+ TImpl::TImpl(ERuntime runtime) {
+ switch (runtime) {
+ default:
+ Y_ASSERT(false);
[[fallthrough]];
- case ERuntime::Core2:
- AllocScratch = core2_hs_alloc_scratch;
- Scan = core2_hs_scan;
- SerializeDatabase = core2_hs_serialize_database;
- DeserializeDatabase = core2_hs_deserialize_database;
- break;
- case ERuntime::Corei7:
- AllocScratch = corei7_hs_alloc_scratch;
- Scan = corei7_hs_scan;
- SerializeDatabase = corei7_hs_serialize_database;
- DeserializeDatabase = corei7_hs_deserialize_database;
- break;
- case ERuntime::AVX2:
- AllocScratch = avx2_hs_alloc_scratch;
- Scan = avx2_hs_scan;
- SerializeDatabase = avx2_hs_serialize_database;
- DeserializeDatabase = avx2_hs_deserialize_database;
- break;
- case ERuntime::AVX512:
- AllocScratch = avx512_hs_alloc_scratch;
- Scan = avx512_hs_scan;
- SerializeDatabase = avx512_hs_serialize_database;
- DeserializeDatabase = avx512_hs_deserialize_database;
- }
+ case ERuntime::Core2:
+ AllocScratch = core2_hs_alloc_scratch;
+ Scan = core2_hs_scan;
+ SerializeDatabase = core2_hs_serialize_database;
+ DeserializeDatabase = core2_hs_deserialize_database;
+ break;
+ case ERuntime::Corei7:
+ AllocScratch = corei7_hs_alloc_scratch;
+ Scan = corei7_hs_scan;
+ SerializeDatabase = corei7_hs_serialize_database;
+ DeserializeDatabase = corei7_hs_deserialize_database;
+ break;
+ case ERuntime::AVX2:
+ AllocScratch = avx2_hs_alloc_scratch;
+ Scan = avx2_hs_scan;
+ SerializeDatabase = avx2_hs_serialize_database;
+ DeserializeDatabase = avx2_hs_deserialize_database;
+ break;
+ case ERuntime::AVX512:
+ AllocScratch = avx512_hs_alloc_scratch;
+ Scan = avx512_hs_scan;
+ SerializeDatabase = avx512_hs_serialize_database;
+ DeserializeDatabase = avx512_hs_deserialize_database;
+ }
}
-
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
- hs_database_t* rawDb = nullptr;
- hs_compile_error_t* rawCompileErr = nullptr;
- hs_error_t status = hs_compile(
- regex.begin(),
- flags,
- HS_MODE_BLOCK,
- platform,
- &rawDb,
- &rawCompileErr);
- TDatabase db(rawDb);
- NHyperscan::TCompileError compileError(rawCompileErr);
- if (status != HS_SUCCESS) {
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
+ hs_database_t* rawDb = nullptr;
+ hs_compile_error_t* rawCompileErr = nullptr;
+ hs_error_t status = hs_compile(
+ regex.begin(),
+ flags,
+ HS_MODE_BLOCK,
+ platform,
+ &rawDb,
+ &rawCompileErr);
+ TDatabase db(rawDb);
+ NHyperscan::TCompileError compileError(rawCompileErr);
+ if (status != HS_SUCCESS) {
ythrow TCompileException()
- << "Failed to compile regex: " << regex << ". "
- << "Error message (hyperscan): " << compileError->message;
- }
- return db;
- }
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- hs_platform_info_t* platform,
- const TVector<const hs_expr_ext_t*>* extendedParameters) {
- unsigned int count = regexs.size();
- if (flags.size() != count) {
- ythrow yexception()
- << "Mismatch of sizes vectors passed to CompileMulti. "
- << "size(regexs) = " << regexs.size() << ". "
- << "size(flags) = " << flags.size() << ".";
- }
- if (ids.size() != count) {
- ythrow yexception()
- << "Mismatch of sizes vectors passed to CompileMulti. "
- << "size(regexs) = " << regexs.size() << ". "
- << "size(ids) = " << ids.size() << ".";
- }
- if (extendedParameters && extendedParameters->size() != count) {
- ythrow yexception()
- << "Mismatch of sizes vectors passed to CompileMulti. "
- << "size(regexs) = " << regexs.size() << ". "
- << "size(extendedParameters) = " << extendedParameters->size() << ".";
+ << "Failed to compile regex: " << regex << ". "
+ << "Error message (hyperscan): " << compileError->message;
}
- hs_database_t* rawDb = nullptr;
- hs_compile_error_t* rawCompileErr = nullptr;
- hs_error_t status = hs_compile_ext_multi(
- regexs.data(),
- flags.data(),
- ids.data(),
- extendedParameters ? extendedParameters->data() : nullptr,
- count,
- HS_MODE_BLOCK,
- platform,
- &rawDb,
- &rawCompileErr);
- TDatabase db(rawDb);
- NHyperscan::TCompileError compileError(rawCompileErr);
- if (status != HS_SUCCESS) {
- if (compileError->expression >= 0) {
- const char* regex = regexs[compileError->expression];
- ythrow TCompileException()
- << "Failed to compile regex: " << regex << ". "
- << "Error message (hyperscan): " << compileError->message;
- } else {
- ythrow TCompileException()
- << "Failed to compile multiple regexs. "
- << "Error message (hyperscan): " << compileError->message;
- }
- }
- return db;
- }
-
- bool Matches(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
- const TImpl& impl) {
- bool result = false;
- auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {
- result = true;
- return 1; // stop scan
- };
- Scan(
- db,
- scratch,
- text,
- callback,
- impl);
- return result;
+ return db;
}
- } // namespace NPrivate
-
- TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ hs_platform_info_t* platform,
+ const TVector<const hs_expr_ext_t*>* extendedParameters) {
+ unsigned int count = regexs.size();
+ if (flags.size() != count) {
+ ythrow yexception()
+ << "Mismatch of sizes vectors passed to CompileMulti. "
+ << "size(regexs) = " << regexs.size() << ". "
+ << "size(flags) = " << flags.size() << ".";
+ }
+ if (ids.size() != count) {
+ ythrow yexception()
+ << "Mismatch of sizes vectors passed to CompileMulti. "
+ << "size(regexs) = " << regexs.size() << ". "
+ << "size(ids) = " << ids.size() << ".";
+ }
+ if (extendedParameters && extendedParameters->size() != count) {
+ ythrow yexception()
+ << "Mismatch of sizes vectors passed to CompileMulti. "
+ << "size(regexs) = " << regexs.size() << ". "
+ << "size(extendedParameters) = " << extendedParameters->size() << ".";
+ }
+ hs_database_t* rawDb = nullptr;
+ hs_compile_error_t* rawCompileErr = nullptr;
+ hs_error_t status = hs_compile_ext_multi(
+ regexs.data(),
+ flags.data(),
+ ids.data(),
+ extendedParameters ? extendedParameters->data() : nullptr,
+ count,
+ HS_MODE_BLOCK,
+ platform,
+ &rawDb,
+ &rawCompileErr);
+ TDatabase db(rawDb);
+ NHyperscan::TCompileError compileError(rawCompileErr);
+ if (status != HS_SUCCESS) {
+ if (compileError->expression >= 0) {
+ const char* regex = regexs[compileError->expression];
+ ythrow TCompileException()
+ << "Failed to compile regex: " << regex << ". "
+ << "Error message (hyperscan): " << compileError->message;
+ } else {
+ ythrow TCompileException()
+ << "Failed to compile multiple regexs. "
+ << "Error message (hyperscan): " << compileError->message;
+ }
+ }
+ return db;
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ const TImpl& impl) {
+ bool result = false;
+ auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {
+ result = true;
+ return 1; // stop scan
+ };
+ Scan(
+ db,
+ scratch,
+ text,
+ callback,
+ impl);
+ return result;
+ }
+ } // namespace NPrivate
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
return NPrivate::Compile(regex, flags, &platformInfo);
}
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
- auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
- return NPrivate::Compile(regex, flags, &platformInfo);
- }
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- const TVector<const hs_expr_ext_t*>* extendedParameters)
- {
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
+ auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
+ return NPrivate::Compile(regex, flags, &platformInfo);
+ }
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ const TVector<const hs_expr_ext_t*>* extendedParameters)
+ {
auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
- }
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- TCPUFeatures cpuFeatures,
- const TVector<const hs_expr_ext_t*>* extendedParameters)
- {
- auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
- return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
- }
-
+ }
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ TCPUFeatures cpuFeatures,
+ const TVector<const hs_expr_ext_t*>* extendedParameters)
+ {
+ auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
+ return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
+ }
+
TScratch MakeScratch(const TDatabase& db) {
hs_scratch_t* rawScratch = nullptr;
- hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
+ hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
NHyperscan::TScratch scratch(rawScratch);
if (status != HS_SUCCESS) {
ythrow yexception() << "Failed to make scratch for hyperscan database";
@@ -221,7 +221,7 @@ namespace NHyperscan {
void GrowScratch(TScratch& scratch, const TDatabase& db) {
hs_scratch_t* rawScratch = scratch.Get();
- hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
+ hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
if (rawScratch != scratch.Get()) {
Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch
scratch.Reset(rawScratch);
@@ -244,9 +244,9 @@ namespace NHyperscan {
bool Matches(
const TDatabase& db,
const TScratch& scratch,
- const TStringBuf& text)
- {
- return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
+ const TStringBuf& text)
+ {
+ return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
}
TString Serialize(const TDatabase& db) {
@@ -271,11 +271,11 @@ namespace NHyperscan {
&rawDb);
TDatabase db(rawDb);
if (status != HS_SUCCESS) {
- if (status == HS_DB_PLATFORM_ERROR) {
- ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
- } else {
- ythrow yexception() << "Failed to deserialize hyperscan database";
- }
+ if (status == HS_DB_PLATFORM_ERROR) {
+ ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
+ } else {
+ ythrow yexception() << "Failed to deserialize hyperscan database";
+ }
}
return db;
}
diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h
index 1c8f404389..ef50cca08e 100644
--- a/library/cpp/regex/hyperscan/hyperscan.h
+++ b/library/cpp/regex/hyperscan/hyperscan.h
@@ -9,14 +9,14 @@
#include <util/system/cpu_id.h>
namespace NHyperscan {
- using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
- constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
- constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
-
- template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
+ using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
+ constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
+ constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
+
+ template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
class TDeleter {
public:
- template<typename T>
+ template<typename T>
static void Destroy(T* ptr) {
NativeDeleter(ptr);
}
@@ -26,127 +26,127 @@ namespace NHyperscan {
using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
- class TCompileException : public yexception {
+ class TCompileException : public yexception {
};
-
+
namespace NPrivate {
- enum class ERuntime {
- Core2 = 0,
- Corei7 = 1,
- AVX2 = 2,
- AVX512 = 3
- };
-
- ERuntime DetectCurrentRuntime();
-
- TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
-
- hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
-
+ enum class ERuntime {
+ Core2 = 0,
+ Corei7 = 1,
+ AVX2 = 2,
+ AVX512 = 3
+ };
+
+ ERuntime DetectCurrentRuntime();
+
+ TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
+
+ hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
+
struct TImpl {
- hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
-
- hs_error_t (*Scan)(const hs_database_t* db, const char* data,
- unsigned length, unsigned flags, hs_scratch_t* scratch,
- match_event_handler onEvent, void* userCtx);
-
- hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
-
- hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
-
- TImpl() : TImpl(DetectCurrentRuntime()) {}
-
- explicit TImpl(ERuntime runtime);
+ hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
+
+ hs_error_t (*Scan)(const hs_database_t* db, const char* data,
+ unsigned length, unsigned flags, hs_scratch_t* scratch,
+ match_event_handler onEvent, void* userCtx);
+
+ hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
+
+ hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
+
+ TImpl() : TImpl(DetectCurrentRuntime()) {}
+
+ explicit TImpl(ERuntime runtime);
};
-
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- hs_platform_info_t* platform,
- const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
-
- // We need to parametrize Scan and Matches functions for testing purposes
- template<typename TCallback>
- void Scan(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
- TCallback& callback, // applied to index of matched regex
- const TImpl& impl
- ) {
- struct TCallbackWrapper {
- static int EventHandler(
- unsigned int id,
- unsigned long long from,
- unsigned long long to,
- unsigned int flags,
- void* ctx) {
- Y_UNUSED(flags);
- TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
- if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
- return callback2(id, from, to);
- } else {
- callback2(id, from, to);
- return 0;
- }
- }
- };
- unsigned int flags = 0; // unused at present
- hs_error_t status = impl.Scan(
- db.Get(),
- text.begin(),
- text.size(),
- flags,
- scratch.Get(),
- &TCallbackWrapper::EventHandler,
- &callback);
- if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
- ythrow yexception() << "Failed to scan against text: " << text;
- }
- }
-
- bool Matches(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
- const TImpl& impl);
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ hs_platform_info_t* platform,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
+ // We need to parametrize Scan and Matches functions for testing purposes
+ template<typename TCallback>
+ void Scan(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ TCallback& callback, // applied to index of matched regex
+ const TImpl& impl
+ ) {
+ struct TCallbackWrapper {
+ static int EventHandler(
+ unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void* ctx) {
+ Y_UNUSED(flags);
+ TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
+ if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
+ return callback2(id, from, to);
+ } else {
+ callback2(id, from, to);
+ return 0;
+ }
+ }
+ };
+ unsigned int flags = 0; // unused at present
+ hs_error_t status = impl.Scan(
+ db.Get(),
+ text.begin(),
+ text.size(),
+ flags,
+ scratch.Get(),
+ &TCallbackWrapper::EventHandler,
+ &callback);
+ if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
+ ythrow yexception() << "Failed to scan against text: " << text;
+ }
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ const TImpl& impl);
}
TDatabase Compile(const TStringBuf& regex, unsigned int flags);
- TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
-
- TDatabase CompileMulti(
- const TVector<const char*>& regexs,
- const TVector<unsigned int>& flags,
- const TVector<unsigned int>& ids,
- const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
-
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
+
TDatabase CompileMulti(
const TVector<const char*>& regexs,
const TVector<unsigned int>& flags,
const TVector<unsigned int>& ids,
- TCPUFeatures cpuFeatures,
const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ TCPUFeatures cpuFeatures,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
TScratch MakeScratch(const TDatabase& db);
void GrowScratch(TScratch& scratch, const TDatabase& db);
TScratch CloneScratch(const TScratch& scratch);
- template<typename TCallback>
+ template<typename TCallback>
void Scan(
const TDatabase& db,
const TScratch& scratch,
const TStringBuf& text,
TCallback& callback // applied to index of matched regex
) {
- NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
+ NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
}
bool Matches(
diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
index 9caa53f2e7..7abbaa4b08 100644
--- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
+++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
@@ -4,12 +4,12 @@
#include <util/generic/set.h>
-#include <array>
-#include <algorithm>
-
+#include <array>
+#include <algorithm>
+
Y_UNIT_TEST_SUITE(HyperscanWrappers) {
using namespace NHyperscan;
- using namespace NHyperscan::NPrivate;
+ using namespace NHyperscan::NPrivate;
Y_UNIT_TEST(CompileAndScan) {
TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
@@ -120,112 +120,112 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
scratch1.Reset();
UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo"));
}
-
- class TSimpleSingleRegex {
- public:
- static TDatabase Compile(TCPUFeatures cpuFeatures) {
- return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures);
- }
- static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
- UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
- }
- };
-
- // This regex uses AVX2 instructions on long (>70) texts.
- // It crushes when compiled for machine with AVX2 and run on machine without it.
- class TAvx2SingleRegex {
- public:
- static TDatabase Compile(TCPUFeatures cpuFeatures) {
- auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+"
- "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}";
- unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY;
- return NHyperscan::Compile(regex, flags, cpuFeatures);
- }
- static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(
- db,
- scratch,
- "_________________________________________________________________"
- "фу.bar"
- "_________________________________________________________________",
- impl));
- UNIT_ASSERT(!NHyperscan::NPrivate::Matches(
- db,
- scratch,
- "_________________________________________________________________"
- "фу"
- "_________________________________________________________________",
- impl));
- }
- };
-
- class TSimpleMultiRegex {
- public:
- static TDatabase Compile(TCPUFeatures cpuFeatures) {
- return NHyperscan::CompileMulti(
- {
- "foo",
- "bar",
- },
- {
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,
- },
- {
- 42,
- 241,
- },
- cpuFeatures);
- }
- static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
-
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl));
- UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl));
- UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
-
- TSet<unsigned int> foundIds;
- auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {
- foundIds.insert(id);
- };
- NHyperscan::NPrivate::Scan(
- db,
- scratch,
- "fooBaR",
- callback,
- impl);
- UNIT_ASSERT_EQUAL(foundIds.size(), 2);
- UNIT_ASSERT(foundIds.contains(42));
- UNIT_ASSERT(foundIds.contains(241));
- }
- };
-
- template <class Regex>
- void TestCrossPlatformCompile() {
- const std::array<ERuntime, 4> runtimes = {
- ERuntime::Core2,
- ERuntime::Corei7,
- ERuntime::AVX2,
- ERuntime::AVX512
- };
-
- // Unfortunately, we cannot emulate runtimes with more capabilities than current machine.
- auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime());
- Y_ASSERT(currentRuntimeIter != runtimes.cend());
-
- for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) {
- auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime));
- Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime});
- }
- }
-
- Y_UNIT_TEST(CrossPlatformCompile) {
- TestCrossPlatformCompile<TSimpleSingleRegex>();
- TestCrossPlatformCompile<TAvx2SingleRegex>();
- TestCrossPlatformCompile<TSimpleMultiRegex>();
- }
+
+ class TSimpleSingleRegex {
+ public:
+ static TDatabase Compile(TCPUFeatures cpuFeatures) {
+ return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures);
+ }
+ static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
+ UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
+ }
+ };
+
+ // This regex uses AVX2 instructions on long (>70) texts.
+ // It crushes when compiled for machine with AVX2 and run on machine without it.
+ class TAvx2SingleRegex {
+ public:
+ static TDatabase Compile(TCPUFeatures cpuFeatures) {
+ auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+"
+ "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}";
+ unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY;
+ return NHyperscan::Compile(regex, flags, cpuFeatures);
+ }
+ static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(
+ db,
+ scratch,
+ "_________________________________________________________________"
+ "фу.bar"
+ "_________________________________________________________________",
+ impl));
+ UNIT_ASSERT(!NHyperscan::NPrivate::Matches(
+ db,
+ scratch,
+ "_________________________________________________________________"
+ "фу"
+ "_________________________________________________________________",
+ impl));
+ }
+ };
+
+ class TSimpleMultiRegex {
+ public:
+ static TDatabase Compile(TCPUFeatures cpuFeatures) {
+ return NHyperscan::CompileMulti(
+ {
+ "foo",
+ "bar",
+ },
+ {
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,
+ },
+ {
+ 42,
+ 241,
+ },
+ cpuFeatures);
+ }
+ static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl));
+ UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl));
+ UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));
+
+ TSet<unsigned int> foundIds;
+ auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {
+ foundIds.insert(id);
+ };
+ NHyperscan::NPrivate::Scan(
+ db,
+ scratch,
+ "fooBaR",
+ callback,
+ impl);
+ UNIT_ASSERT_EQUAL(foundIds.size(), 2);
+ UNIT_ASSERT(foundIds.contains(42));
+ UNIT_ASSERT(foundIds.contains(241));
+ }
+ };
+
+ template <class Regex>
+ void TestCrossPlatformCompile() {
+ const std::array<ERuntime, 4> runtimes = {
+ ERuntime::Core2,
+ ERuntime::Corei7,
+ ERuntime::AVX2,
+ ERuntime::AVX512
+ };
+
+ // Unfortunately, we cannot emulate runtimes with more capabilities than current machine.
+ auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime());
+ Y_ASSERT(currentRuntimeIter != runtimes.cend());
+
+ for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) {
+ auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime));
+ Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime});
+ }
+ }
+
+ Y_UNIT_TEST(CrossPlatformCompile) {
+ TestCrossPlatformCompile<TSimpleSingleRegex>();
+ TestCrossPlatformCompile<TAvx2SingleRegex>();
+ TestCrossPlatformCompile<TSimpleMultiRegex>();
+ }
}
diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make
index e99130ae18..e58d93502c 100644
--- a/library/cpp/regex/hyperscan/ya.make
+++ b/library/cpp/regex/hyperscan/ya.make
@@ -15,5 +15,5 @@ SRCS(
)
END()
-
-RECURSE_FOR_TESTS(ut)
+
+RECURSE_FOR_TESTS(ut)