diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/regex/hyperscan | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/regex/hyperscan')
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 282 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 160 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 231 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/ya.make | 13 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 19 |
5 files changed, 705 insertions, 0 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp new file mode 100644 index 0000000000..ba321f9c29 --- /dev/null +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -0,0 +1,282 @@ +#include "hyperscan.h" + +#include <contrib/libs/hyperscan/runtime_core2/hs_common.h> +#include <contrib/libs/hyperscan/runtime_core2/hs_runtime.h> +#include <contrib/libs/hyperscan/runtime_corei7/hs_common.h> +#include <contrib/libs/hyperscan/runtime_corei7/hs_runtime.h> +#include <contrib/libs/hyperscan/runtime_avx2/hs_common.h> +#include <contrib/libs/hyperscan/runtime_avx2/hs_runtime.h> +#include <contrib/libs/hyperscan/runtime_avx512/hs_common.h> +#include <contrib/libs/hyperscan/runtime_avx512/hs_runtime.h> + +#include <util/generic/singleton.h> + +namespace NHyperscan { + using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>; + + using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>; + + namespace NPrivate { + ERuntime DetectCurrentRuntime() { + if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) { + return ERuntime::AVX512; + } else if (NX86::HaveAVX() && NX86::HaveAVX2()) { + return ERuntime::AVX2; + } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) { + return ERuntime::Corei7; + } else { + return ERuntime::Core2; + } + } + + TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) { + switch (runtime) { + default: + Y_ASSERT(false); + [[fallthrough]]; + case ERuntime::Core2: + case ERuntime::Corei7: + return 0; + case ERuntime::AVX2: + return CPU_FEATURES_AVX2; + case ERuntime::AVX512: + return CPU_FEATURES_AVX512; + } + } + + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { + hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; + return platformInfo; + } + + hs_platform_info_t MakeCurrentPlatformInfo() { + return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime())); + } + + TImpl::TImpl(ERuntime runtime) { + switch (runtime) { + default: + Y_ASSERT(false); + [[fallthrough]]; + case ERuntime::Core2: + AllocScratch = core2_hs_alloc_scratch; + Scan = core2_hs_scan; + SerializeDatabase = core2_hs_serialize_database; + DeserializeDatabase = core2_hs_deserialize_database; + break; + case ERuntime::Corei7: + AllocScratch = corei7_hs_alloc_scratch; + Scan = corei7_hs_scan; + SerializeDatabase = corei7_hs_serialize_database; + DeserializeDatabase = corei7_hs_deserialize_database; + break; + case ERuntime::AVX2: + AllocScratch = avx2_hs_alloc_scratch; + Scan = avx2_hs_scan; + SerializeDatabase = avx2_hs_serialize_database; + DeserializeDatabase = avx2_hs_deserialize_database; + break; + case ERuntime::AVX512: + AllocScratch = avx512_hs_alloc_scratch; + Scan = avx512_hs_scan; + SerializeDatabase = avx512_hs_serialize_database; + DeserializeDatabase = avx512_hs_deserialize_database; + } + } + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { + hs_database_t* rawDb = nullptr; + hs_compile_error_t* rawCompileErr = nullptr; + hs_error_t status = hs_compile( + regex.begin(), + flags, + HS_MODE_BLOCK, + platform, + &rawDb, + &rawCompileErr); + TDatabase db(rawDb); + NHyperscan::TCompileError compileError(rawCompileErr); + if (status != HS_SUCCESS) { + ythrow TCompileException() + << "Failed to compile regex: " << regex << ". " + << "Error message (hyperscan): " << compileError->message; + } + return db; + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + hs_platform_info_t* platform, + const TVector<const hs_expr_ext_t*>* extendedParameters) { + unsigned int count = regexs.size(); + if (flags.size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(flags) = " << flags.size() << "."; + } + if (ids.size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(ids) = " << ids.size() << "."; + } + if (extendedParameters && extendedParameters->size() != count) { + ythrow yexception() + << "Mismatch of sizes vectors passed to CompileMulti. " + << "size(regexs) = " << regexs.size() << ". " + << "size(extendedParameters) = " << extendedParameters->size() << "."; + } + hs_database_t* rawDb = nullptr; + hs_compile_error_t* rawCompileErr = nullptr; + hs_error_t status = hs_compile_ext_multi( + regexs.data(), + flags.data(), + ids.data(), + extendedParameters ? extendedParameters->data() : nullptr, + count, + HS_MODE_BLOCK, + platform, + &rawDb, + &rawCompileErr); + TDatabase db(rawDb); + NHyperscan::TCompileError compileError(rawCompileErr); + if (status != HS_SUCCESS) { + if (compileError->expression >= 0) { + const char* regex = regexs[compileError->expression]; + ythrow TCompileException() + << "Failed to compile regex: " << regex << ". " + << "Error message (hyperscan): " << compileError->message; + } else { + ythrow TCompileException() + << "Failed to compile multiple regexs. " + << "Error message (hyperscan): " << compileError->message; + } + } + return db; + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + const TImpl& impl) { + bool result = false; + auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) { + result = true; + return 1; // stop scan + }; + Scan( + db, + scratch, + text, + callback, + impl); + return result; + } + } // namespace NPrivate + + TDatabase Compile(const TStringBuf& regex, unsigned int flags) { + auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); + return NPrivate::Compile(regex, flags, &platformInfo); + } + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { + auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); + return NPrivate::Compile(regex, flags, &platformInfo); + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + const TVector<const hs_expr_ext_t*>* extendedParameters) + { + auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); + return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); + } + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + TCPUFeatures cpuFeatures, + const TVector<const hs_expr_ext_t*>* extendedParameters) + { + auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); + return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); + } + + TScratch MakeScratch(const TDatabase& db) { + hs_scratch_t* rawScratch = nullptr; + hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); + NHyperscan::TScratch scratch(rawScratch); + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to make scratch for hyperscan database"; + } + return scratch; + } + + void GrowScratch(TScratch& scratch, const TDatabase& db) { + hs_scratch_t* rawScratch = scratch.Get(); + hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); + if (rawScratch != scratch.Get()) { + Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch + scratch.Reset(rawScratch); + } + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to make grow scratch for hyperscan database"; + } + } + + TScratch CloneScratch(const TScratch& scratch) { + hs_scratch_t* rawScratch = nullptr; + hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch); + TScratch scratchCopy(rawScratch); + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to clone scratch for hyperscan database"; + } + return scratchCopy; + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text) + { + return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); + } + + TString Serialize(const TDatabase& db) { + char* databaseBytes = nullptr; + size_t databaseLength; + hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase( + db.Get(), + &databaseBytes, + &databaseLength); + TSerializedDatabase serialization(databaseBytes); + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to serialize hyperscan database"; + } + return TString(serialization.Get(), databaseLength); + } + + TDatabase Deserialize(const TStringBuf& serialization) { + hs_database_t* rawDb = nullptr; + hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase( + serialization.begin(), + serialization.size(), + &rawDb); + TDatabase db(rawDb); + if (status != HS_SUCCESS) { + if (status == HS_DB_PLATFORM_ERROR) { + ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; + } else { + ythrow yexception() << "Failed to deserialize hyperscan database"; + } + } + return db; + } +} diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h new file mode 100644 index 0000000000..1c8f404389 --- /dev/null +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -0,0 +1,160 @@ +#pragma once + +#include <contrib/libs/hyperscan/src/hs.h> + +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> +#include <util/system/cpu_id.h> + +namespace NHyperscan { + using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); + constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; + constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; + + template<typename TNativeDeleter, TNativeDeleter NativeDeleter> + class TDeleter { + public: + template<typename T> + static void Destroy(T* ptr) { + NativeDeleter(ptr); + } + }; + + using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>; + + using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; + + class TCompileException : public yexception { + }; + + + namespace NPrivate { + enum class ERuntime { + Core2 = 0, + Corei7 = 1, + AVX2 = 2, + AVX512 = 3 + }; + + ERuntime DetectCurrentRuntime(); + + TCPUFeatures RuntimeCpuFeatures(ERuntime runtime); + + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures); + + struct TImpl { + hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch); + + hs_error_t (*Scan)(const hs_database_t* db, const char* data, + unsigned length, unsigned flags, hs_scratch_t* scratch, + match_event_handler onEvent, void* userCtx); + + hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length); + + hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info); + + TImpl() : TImpl(DetectCurrentRuntime()) {} + + explicit TImpl(ERuntime runtime); + }; + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + hs_platform_info_t* platform, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + + // We need to parametrize Scan and Matches functions for testing purposes + template<typename TCallback> + void Scan( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + TCallback& callback, // applied to index of matched regex + const TImpl& impl + ) { + struct TCallbackWrapper { + static int EventHandler( + unsigned int id, + unsigned long long from, + unsigned long long to, + unsigned int flags, + void* ctx) { + Y_UNUSED(flags); + TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx); + if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) { + return callback2(id, from, to); + } else { + callback2(id, from, to); + return 0; + } + } + }; + unsigned int flags = 0; // unused at present + hs_error_t status = impl.Scan( + db.Get(), + text.begin(), + text.size(), + flags, + scratch.Get(), + &TCallbackWrapper::EventHandler, + &callback); + if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) { + ythrow yexception() << "Failed to scan against text: " << text; + } + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + const TImpl& impl); + } + + TDatabase Compile(const TStringBuf& regex, unsigned int flags); + + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + + TDatabase CompileMulti( + const TVector<const char*>& regexs, + const TVector<unsigned int>& flags, + const TVector<unsigned int>& ids, + TCPUFeatures cpuFeatures, + const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + + TScratch MakeScratch(const TDatabase& db); + + void GrowScratch(TScratch& scratch, const TDatabase& db); + + TScratch CloneScratch(const TScratch& scratch); + + template<typename TCallback> + void Scan( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, + TCallback& callback // applied to index of matched regex + ) { + NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text); + + TString Serialize(const TDatabase& db); + + TDatabase Deserialize(const TStringBuf& serialization); +} diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp new file mode 100644 index 0000000000..9caa53f2e7 --- /dev/null +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -0,0 +1,231 @@ +#include <library/cpp/regex/hyperscan/hyperscan.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/set.h> + +#include <array> +#include <algorithm> + +Y_UNIT_TEST_SUITE(HyperscanWrappers) { + using namespace NHyperscan; + using namespace NHyperscan::NPrivate; + + Y_UNIT_TEST(CompileAndScan) { + TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + TScratch scratch = MakeScratch(db); + + unsigned int foundId = 42; + auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { + foundId = id; + }; + NHyperscan::Scan( + db, + scratch, + "abc", + callback); + UNIT_ASSERT_EQUAL(foundId, 0); + } + + Y_UNIT_TEST(Matches) { + NHyperscan::TDatabase db = NHyperscan::Compile( + "a.c", + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc")); + UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo")); + } + + Y_UNIT_TEST(Multi) { + NHyperscan::TDatabase db = NHyperscan::CompileMulti( + { + "foo", + "bar", + }, + { + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, + }, + { + 42, + 241, + }); + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo")); + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar")); + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR")); + UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO")); + + TSet<unsigned int> foundIds; + auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { + foundIds.insert(id); + }; + NHyperscan::Scan( + db, + scratch, + "fooBaR", + callback); + UNIT_ASSERT_EQUAL(foundIds.size(), 2); + UNIT_ASSERT(foundIds.contains(42)); + UNIT_ASSERT(foundIds.contains(241)); + } + + // https://ml.yandex-team.ru/thread/2370000002965712422/ + Y_UNIT_TEST(MultiRegression) { + NHyperscan::CompileMulti( + { + "aa.bb/cc.dd", + }, + { + HS_FLAG_UTF8, + }, + { + 0, + }); + } + + Y_UNIT_TEST(Serialize) { + NHyperscan::TDatabase db = NHyperscan::Compile( + "foo", + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + TString serialization = Serialize(db); + db.Reset(); + TDatabase db2 = Deserialize(serialization); + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2); + + UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo")); + UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO")); + } + + Y_UNIT_TEST(GrowScratch) { + NHyperscan::TDatabase db1 = NHyperscan::Compile( + "foo", + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + NHyperscan::TDatabase db2 = NHyperscan::Compile( + "longer\\w\\w\\wpattern", + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8); + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1); + NHyperscan::GrowScratch(scratch, db2); + UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo")); + UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern")); + } + + Y_UNIT_TEST(CloneScratch) { + NHyperscan::TDatabase db = NHyperscan::Compile( + "foo", + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db); + NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1); + scratch1.Reset(); + UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); + } + + class TSimpleSingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + } + }; + + // This regex uses AVX2 instructions on long (>70) texts. + // It crushes when compiled for machine with AVX2 and run on machine without it. + class TAvx2SingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" + "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; + unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; + return NHyperscan::Compile(regex, flags, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу.bar" + "_________________________________________________________________", + impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу" + "_________________________________________________________________", + impl)); + } + }; + + class TSimpleMultiRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::CompileMulti( + { + "foo", + "bar", + }, + { + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, + }, + { + 42, + 241, + }, + cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + + TSet<unsigned int> foundIds; + auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { + foundIds.insert(id); + }; + NHyperscan::NPrivate::Scan( + db, + scratch, + "fooBaR", + callback, + impl); + UNIT_ASSERT_EQUAL(foundIds.size(), 2); + UNIT_ASSERT(foundIds.contains(42)); + UNIT_ASSERT(foundIds.contains(241)); + } + }; + + template <class Regex> + void TestCrossPlatformCompile() { + const std::array<ERuntime, 4> runtimes = { + ERuntime::Core2, + ERuntime::Corei7, + ERuntime::AVX2, + ERuntime::AVX512 + }; + + // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. + auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); + Y_ASSERT(currentRuntimeIter != runtimes.cend()); + + for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { + auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); + Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); + } + } + + Y_UNIT_TEST(CrossPlatformCompile) { + TestCrossPlatformCompile<TSimpleSingleRegex>(); + TestCrossPlatformCompile<TAvx2SingleRegex>(); + TestCrossPlatformCompile<TSimpleMultiRegex>(); + } +} diff --git a/library/cpp/regex/hyperscan/ut/ya.make b/library/cpp/regex/hyperscan/ut/ya.make new file mode 100644 index 0000000000..da67b88672 --- /dev/null +++ b/library/cpp/regex/hyperscan/ut/ya.make @@ -0,0 +1,13 @@ +UNITTEST() + +PEERDIR( + library/cpp/regex/hyperscan +) + +OWNER(g:antiinfra) + +SRCS( + hyperscan_ut.cpp +) + +END() diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make new file mode 100644 index 0000000000..e99130ae18 --- /dev/null +++ b/library/cpp/regex/hyperscan/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +OWNER(g:antiinfra) + +PEERDIR( + contrib/libs/hyperscan + contrib/libs/hyperscan/runtime_core2 + contrib/libs/hyperscan/runtime_corei7 + contrib/libs/hyperscan/runtime_avx2 + contrib/libs/hyperscan/runtime_avx512 +) + +SRCS( + hyperscan.cpp +) + +END() + +RECURSE_FOR_TESTS(ut) |