diff options
author | jakovenko-dm <jakovenko-dm@yandex-team.ru> | 2022-02-10 16:48:06 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:06 +0300 |
commit | 7077baee21e33a3ad2e790527b1c50b22c244db3 (patch) | |
tree | e719eb81a7dbb542f49340ad8c36c65d58ac42f6 /library/cpp/regex | |
parent | 4282ec504ababea092138c3af45d5399d01c194a (diff) | |
download | ydb-7077baee21e33a3ad2e790527b1c50b22c244db3.tar.gz |
Restoring authorship annotation for <jakovenko-dm@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex')
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 352 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 200 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 224 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 4 |
4 files changed, 390 insertions, 390 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index ba321f9c29..82ca3880d1 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -17,201 +17,201 @@ namespace NHyperscan { using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>; namespace NPrivate { - ERuntime DetectCurrentRuntime() { + ERuntime DetectCurrentRuntime() { if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) { - return ERuntime::AVX512; + return ERuntime::AVX512; } else if (NX86::HaveAVX() && NX86::HaveAVX2()) { - return ERuntime::AVX2; + return ERuntime::AVX2; } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) { - return ERuntime::Corei7; + return ERuntime::Corei7; } else { - return ERuntime::Core2; + return ERuntime::Core2; } } - TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) { - switch (runtime) { - default: - Y_ASSERT(false); + TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) { + switch (runtime) { + default: + Y_ASSERT(false); [[fallthrough]]; - case ERuntime::Core2: - case ERuntime::Corei7: - return 0; - case ERuntime::AVX2: - return CPU_FEATURES_AVX2; - case ERuntime::AVX512: - return CPU_FEATURES_AVX512; - } + case ERuntime::Core2: + case ERuntime::Corei7: + return 0; + case ERuntime::AVX2: + return CPU_FEATURES_AVX2; + case ERuntime::AVX512: + return CPU_FEATURES_AVX512; + } } - hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { - hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; - return platformInfo; + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { + hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; + return platformInfo; } - + hs_platform_info_t MakeCurrentPlatformInfo() { return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime())); } - TImpl::TImpl(ERuntime runtime) { - switch (runtime) { - default: - Y_ASSERT(false); + TImpl::TImpl(ERuntime runtime) { + switch (runtime) { + default: + Y_ASSERT(false); [[fallthrough]]; - case ERuntime::Core2: - AllocScratch = core2_hs_alloc_scratch; - Scan = core2_hs_scan; - SerializeDatabase = core2_hs_serialize_database; - DeserializeDatabase = core2_hs_deserialize_database; - break; - case ERuntime::Corei7: - AllocScratch = corei7_hs_alloc_scratch; - Scan = corei7_hs_scan; - SerializeDatabase = corei7_hs_serialize_database; - DeserializeDatabase = corei7_hs_deserialize_database; - break; - case ERuntime::AVX2: - AllocScratch = avx2_hs_alloc_scratch; - Scan = avx2_hs_scan; - SerializeDatabase = avx2_hs_serialize_database; - DeserializeDatabase = avx2_hs_deserialize_database; - break; - case ERuntime::AVX512: - AllocScratch = avx512_hs_alloc_scratch; - Scan = avx512_hs_scan; - SerializeDatabase = avx512_hs_serialize_database; - DeserializeDatabase = avx512_hs_deserialize_database; - } + case ERuntime::Core2: + AllocScratch = core2_hs_alloc_scratch; + Scan = core2_hs_scan; + SerializeDatabase = core2_hs_serialize_database; + DeserializeDatabase = core2_hs_deserialize_database; + break; + case ERuntime::Corei7: + AllocScratch = corei7_hs_alloc_scratch; + Scan = corei7_hs_scan; + SerializeDatabase = corei7_hs_serialize_database; + DeserializeDatabase = corei7_hs_deserialize_database; + break; + case ERuntime::AVX2: + AllocScratch = avx2_hs_alloc_scratch; + Scan = avx2_hs_scan; + SerializeDatabase = avx2_hs_serialize_database; + DeserializeDatabase = avx2_hs_deserialize_database; + break; + case ERuntime::AVX512: + AllocScratch = avx512_hs_alloc_scratch; + Scan = avx512_hs_scan; + SerializeDatabase = avx512_hs_serialize_database; + DeserializeDatabase = avx512_hs_deserialize_database; + } } - - TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { - hs_database_t* rawDb = nullptr; - hs_compile_error_t* rawCompileErr = nullptr; - hs_error_t status = hs_compile( - regex.begin(), - flags, - HS_MODE_BLOCK, - platform, - &rawDb, - &rawCompileErr); - TDatabase db(rawDb); - NHyperscan::TCompileError compileError(rawCompileErr); - if (status != HS_SUCCESS) { + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { + hs_database_t* rawDb = nullptr; + hs_compile_error_t* rawCompileErr = nullptr; + hs_error_t status = hs_compile( + regex.begin(), + flags, + HS_MODE_BLOCK, + platform, + &rawDb, + &rawCompileErr); + TDatabase db(rawDb); + NHyperscan::TCompileError compileError(rawCompileErr); + if (status != HS_SUCCESS) { ythrow TCompileException() - << "Failed to compile regex: " << regex << ". " - << "Error message (hyperscan): " << compileError->message; - } - return db; - } - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - hs_platform_info_t* platform, - const TVector<const hs_expr_ext_t*>* extendedParameters) { - unsigned int count = regexs.size(); - if (flags.size() != count) { - ythrow yexception() - << "Mismatch of sizes vectors passed to CompileMulti. " - << "size(regexs) = " << regexs.size() << ". " - << "size(flags) = " << flags.size() << "."; - } - if (ids.size() != count) { - ythrow yexception() - << "Mismatch of sizes vectors passed to CompileMulti. " - << "size(regexs) = " << regexs.size() << ". " - << "size(ids) = " << ids.size() << "."; - } - if (extendedParameters && extendedParameters->size() != count) { - ythrow yexception() - << "Mismatch of sizes vectors passed to CompileMulti. " - << "size(regexs) = " << regexs.size() << ". " - << "size(extendedParameters) = " << extendedParameters->size() << "."; + << "Failed to compile regex: " << regex << ". " + << "Error message (hyperscan): " << compileError->message; } - hs_database_t* rawDb = nullptr; - hs_compile_error_t* rawCompileErr = nullptr; - hs_error_t status = hs_compile_ext_multi( - regexs.data(), - flags.data(), - ids.data(), - extendedParameters ? extendedParameters->data() : nullptr, - count, - HS_MODE_BLOCK, - platform, - &rawDb, - &rawCompileErr); - TDatabase db(rawDb); - NHyperscan::TCompileError compileError(rawCompileErr); - if (status != HS_SUCCESS) { - if (compileError->expression >= 0) { - const char* regex = regexs[compileError->expression]; - ythrow TCompileException() - << "Failed to compile regex: " << regex << ". " - << "Error message (hyperscan): " << compileError->message; - } else { - ythrow TCompileException() - << "Failed to compile multiple regexs. " - << "Error message (hyperscan): " << compileError->message; - } - } - return db; - } - - bool Matches( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, - const TImpl& impl) { - bool result = false; - auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) { - result = true; - return 1; // stop scan - }; - Scan( - db, - scratch, - text, - callback, - impl); - return result; + return db; } - } // namespace NPrivate - - TDatabase Compile(const TStringBuf& regex, unsigned int flags) { + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + hs_platform_info_t* platform, + const TVector<const hs_expr_ext_t*>* extendedParameters) { + unsigned int count = regexs.size(); + if (flags.size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(flags) = " << flags.size() << "."; + } + if (ids.size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(ids) = " << ids.size() << "."; + } + if (extendedParameters && extendedParameters->size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(extendedParameters) = " << extendedParameters->size() << "."; + } + hs_database_t* rawDb = nullptr; + hs_compile_error_t* rawCompileErr = nullptr; + hs_error_t status = hs_compile_ext_multi( + regexs.data(), + flags.data(), + ids.data(), + extendedParameters ? extendedParameters->data() : nullptr, + count, + HS_MODE_BLOCK, + platform, + &rawDb, + &rawCompileErr); + TDatabase db(rawDb); + NHyperscan::TCompileError compileError(rawCompileErr); + if (status != HS_SUCCESS) { + if (compileError->expression >= 0) { + const char* regex = regexs[compileError->expression]; + ythrow TCompileException() + << "Failed to compile regex: " << regex << ". " + << "Error message (hyperscan): " << compileError->message; + } else { + ythrow TCompileException() + << "Failed to compile multiple regexs. " + << "Error message (hyperscan): " << compileError->message; + } + } + return db; + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + const TImpl& impl) { + bool result = false; + auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) { + result = true; + return 1; // stop scan + }; + Scan( + db, + scratch, + text, + callback, + impl); + return result; + } + } // namespace NPrivate + + TDatabase Compile(const TStringBuf& regex, unsigned int flags) { auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); return NPrivate::Compile(regex, flags, &platformInfo); } - TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { - auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); - return NPrivate::Compile(regex, flags, &platformInfo); - } - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - const TVector<const hs_expr_ext_t*>* extendedParameters) - { + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { + auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); + return NPrivate::Compile(regex, flags, &platformInfo); + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + const TVector<const hs_expr_ext_t*>* extendedParameters) + { auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); - } - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - TCPUFeatures cpuFeatures, - const TVector<const hs_expr_ext_t*>* extendedParameters) - { - auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); - return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); - } - + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + TCPUFeatures cpuFeatures, + const TVector<const hs_expr_ext_t*>* extendedParameters) + { + auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); + return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); + } + TScratch MakeScratch(const TDatabase& db) { hs_scratch_t* rawScratch = nullptr; - hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); + hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); NHyperscan::TScratch scratch(rawScratch); if (status != HS_SUCCESS) { ythrow yexception() << "Failed to make scratch for hyperscan database"; @@ -221,7 +221,7 @@ namespace NHyperscan { void GrowScratch(TScratch& scratch, const TDatabase& db) { hs_scratch_t* rawScratch = scratch.Get(); - hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); + hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); if (rawScratch != scratch.Get()) { Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch scratch.Reset(rawScratch); @@ -244,9 +244,9 @@ namespace NHyperscan { bool Matches( const TDatabase& db, const TScratch& scratch, - const TStringBuf& text) - { - return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); + const TStringBuf& text) + { + return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); } TString Serialize(const TDatabase& db) { @@ -271,11 +271,11 @@ namespace NHyperscan { &rawDb); TDatabase db(rawDb); if (status != HS_SUCCESS) { - if (status == HS_DB_PLATFORM_ERROR) { - ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; - } else { - ythrow yexception() << "Failed to deserialize hyperscan database"; - } + if (status == HS_DB_PLATFORM_ERROR) { + ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; + } else { + ythrow yexception() << "Failed to deserialize hyperscan database"; + } } return db; } diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index 1c8f404389..ef50cca08e 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -9,14 +9,14 @@ #include <util/system/cpu_id.h> namespace NHyperscan { - using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); - constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; - constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; - - template<typename TNativeDeleter, TNativeDeleter NativeDeleter> + using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); + constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; + constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; + + template<typename TNativeDeleter, TNativeDeleter NativeDeleter> class TDeleter { public: - template<typename T> + template<typename T> static void Destroy(T* ptr) { NativeDeleter(ptr); } @@ -26,127 +26,127 @@ namespace NHyperscan { using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; - class TCompileException : public yexception { + class TCompileException : public yexception { }; - + namespace NPrivate { - enum class ERuntime { - Core2 = 0, - Corei7 = 1, - AVX2 = 2, - AVX512 = 3 - }; - - ERuntime DetectCurrentRuntime(); - - TCPUFeatures RuntimeCpuFeatures(ERuntime runtime); - - hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures); - + enum class ERuntime { + Core2 = 0, + Corei7 = 1, + AVX2 = 2, + AVX512 = 3 + }; + + ERuntime DetectCurrentRuntime(); + + TCPUFeatures RuntimeCpuFeatures(ERuntime runtime); + + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures); + struct TImpl { - hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch); - - hs_error_t (*Scan)(const hs_database_t* db, const char* data, - unsigned length, unsigned flags, hs_scratch_t* scratch, - match_event_handler onEvent, void* userCtx); - - hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length); - - hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info); - - TImpl() : TImpl(DetectCurrentRuntime()) {} - - explicit TImpl(ERuntime runtime); + hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch); + + hs_error_t (*Scan)(const hs_database_t* db, const char* data, + unsigned length, unsigned flags, hs_scratch_t* scratch, + match_event_handler onEvent, void* userCtx); + + hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length); + + hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info); + + TImpl() : TImpl(DetectCurrentRuntime()) {} + + explicit TImpl(ERuntime runtime); }; - - TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - hs_platform_info_t* platform, - const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); - - // We need to parametrize Scan and Matches functions for testing purposes - template<typename TCallback> - void Scan( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, - TCallback& callback, // applied to index of matched regex - const TImpl& impl - ) { - struct TCallbackWrapper { - static int EventHandler( - unsigned int id, - unsigned long long from, - unsigned long long to, - unsigned int flags, - void* ctx) { - Y_UNUSED(flags); - TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx); - if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) { - return callback2(id, from, to); - } else { - callback2(id, from, to); - return 0; - } - } - }; - unsigned int flags = 0; // unused at present - hs_error_t status = impl.Scan( - db.Get(), - text.begin(), - text.size(), - flags, - scratch.Get(), - &TCallbackWrapper::EventHandler, - &callback); - if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) { - ythrow yexception() << "Failed to scan against text: " << text; - } - } - - bool Matches( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, - const TImpl& impl); + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + hs_platform_info_t* platform, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + + // We need to parametrize Scan and Matches functions for testing purposes + template<typename TCallback> + void Scan( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + TCallback& callback, // applied to index of matched regex + const TImpl& impl + ) { + struct TCallbackWrapper { + static int EventHandler( + unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void* ctx) { + Y_UNUSED(flags); + TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx); + if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) { + return callback2(id, from, to); + } else { + callback2(id, from, to); + return 0; + } + } + }; + unsigned int flags = 0; // unused at present + hs_error_t status = impl.Scan( + db.Get(), + text.begin(), + text.size(), + flags, + scratch.Get(), + &TCallbackWrapper::EventHandler, + &callback); + if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) { + ythrow yexception() << "Failed to scan against text: " << text; + } + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + const TImpl& impl); } TDatabase Compile(const TStringBuf& regex, unsigned int flags); - TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); - - TDatabase CompileMulti( - const TVector<const char*>& regexs, - const TVector<unsigned int>& flags, - const TVector<unsigned int>& ids, - const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); - + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); + TDatabase CompileMulti( const TVector<const char*>& regexs, const TVector<unsigned int>& flags, const TVector<unsigned int>& ids, - TCPUFeatures cpuFeatures, const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + TCPUFeatures cpuFeatures, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + TScratch MakeScratch(const TDatabase& db); void GrowScratch(TScratch& scratch, const TDatabase& db); TScratch CloneScratch(const TScratch& scratch); - template<typename TCallback> + template<typename TCallback> void Scan( const TDatabase& db, const TScratch& scratch, const TStringBuf& text, TCallback& callback // applied to index of matched regex ) { - NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); + NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); } bool Matches( diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 9caa53f2e7..7abbaa4b08 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -4,12 +4,12 @@ #include <util/generic/set.h> -#include <array> -#include <algorithm> - +#include <array> +#include <algorithm> + Y_UNIT_TEST_SUITE(HyperscanWrappers) { using namespace NHyperscan; - using namespace NHyperscan::NPrivate; + using namespace NHyperscan::NPrivate; Y_UNIT_TEST(CompileAndScan) { TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); @@ -120,112 +120,112 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { scratch1.Reset(); UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); } - - class TSimpleSingleRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); - } - }; - - // This regex uses AVX2 instructions on long (>70) texts. - // It crushes when compiled for machine with AVX2 and run on machine without it. - class TAvx2SingleRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" - "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; - unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; - return NHyperscan::Compile(regex, flags, cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::NPrivate::Matches( - db, - scratch, - "_________________________________________________________________" - "фу.bar" - "_________________________________________________________________", - impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches( - db, - scratch, - "_________________________________________________________________" - "фу" - "_________________________________________________________________", - impl)); - } - }; - - class TSimpleMultiRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - return NHyperscan::CompileMulti( - { - "foo", - "bar", - }, - { - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, - }, - { - 42, - 241, - }, - cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); - - TSet<unsigned int> foundIds; - auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { - foundIds.insert(id); - }; - NHyperscan::NPrivate::Scan( - db, - scratch, - "fooBaR", - callback, - impl); - UNIT_ASSERT_EQUAL(foundIds.size(), 2); - UNIT_ASSERT(foundIds.contains(42)); - UNIT_ASSERT(foundIds.contains(241)); - } - }; - - template <class Regex> - void TestCrossPlatformCompile() { - const std::array<ERuntime, 4> runtimes = { - ERuntime::Core2, - ERuntime::Corei7, - ERuntime::AVX2, - ERuntime::AVX512 - }; - - // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. - auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); - Y_ASSERT(currentRuntimeIter != runtimes.cend()); - - for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { - auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); - Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); - } - } - - Y_UNIT_TEST(CrossPlatformCompile) { - TestCrossPlatformCompile<TSimpleSingleRegex>(); - TestCrossPlatformCompile<TAvx2SingleRegex>(); - TestCrossPlatformCompile<TSimpleMultiRegex>(); - } + + class TSimpleSingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + } + }; + + // This regex uses AVX2 instructions on long (>70) texts. + // It crushes when compiled for machine with AVX2 and run on machine without it. + class TAvx2SingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" + "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; + unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; + return NHyperscan::Compile(regex, flags, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу.bar" + "_________________________________________________________________", + impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу" + "_________________________________________________________________", + impl)); + } + }; + + class TSimpleMultiRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::CompileMulti( + { + "foo", + "bar", + }, + { + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, + }, + { + 42, + 241, + }, + cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + + TSet<unsigned int> foundIds; + auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { + foundIds.insert(id); + }; + NHyperscan::NPrivate::Scan( + db, + scratch, + "fooBaR", + callback, + impl); + UNIT_ASSERT_EQUAL(foundIds.size(), 2); + UNIT_ASSERT(foundIds.contains(42)); + UNIT_ASSERT(foundIds.contains(241)); + } + }; + + template <class Regex> + void TestCrossPlatformCompile() { + const std::array<ERuntime, 4> runtimes = { + ERuntime::Core2, + ERuntime::Corei7, + ERuntime::AVX2, + ERuntime::AVX512 + }; + + // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. + auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); + Y_ASSERT(currentRuntimeIter != runtimes.cend()); + + for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { + auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); + Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); + } + } + + Y_UNIT_TEST(CrossPlatformCompile) { + TestCrossPlatformCompile<TSimpleSingleRegex>(); + TestCrossPlatformCompile<TAvx2SingleRegex>(); + TestCrossPlatformCompile<TSimpleMultiRegex>(); + } } diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make index e99130ae18..e58d93502c 100644 --- a/library/cpp/regex/hyperscan/ya.make +++ b/library/cpp/regex/hyperscan/ya.make @@ -15,5 +15,5 @@ SRCS( ) END() - -RECURSE_FOR_TESTS(ut) + +RECURSE_FOR_TESTS(ut) |