diff options
author | bnagaev <bnagaev@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:04 +0300 |
commit | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch) | |
tree | d5dca6d44593f5e52556a1cc7b1ab0386e096ebe /library/cpp/regex/hyperscan | |
parent | 1861d4c1402bb2c67a3e6b43b51706081b74508a (diff) | |
download | ydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz |
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex/hyperscan')
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 140 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 94 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 188 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ut/ya.make | 14 | ||||
-rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 24 |
5 files changed, 230 insertions, 230 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index ba321f9c29..cb15d04ae5 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -1,5 +1,5 @@ -#include "hyperscan.h" - +#include "hyperscan.h" + #include <contrib/libs/hyperscan/runtime_core2/hs_common.h> #include <contrib/libs/hyperscan/runtime_core2/hs_runtime.h> #include <contrib/libs/hyperscan/runtime_corei7/hs_common.h> @@ -11,11 +11,11 @@ #include <util/generic/singleton.h> -namespace NHyperscan { - using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>; - - using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>; - +namespace NHyperscan { + using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>; + + using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>; + namespace NPrivate { ERuntime DetectCurrentRuntime() { if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) { @@ -42,12 +42,12 @@ namespace NHyperscan { case ERuntime::AVX512: return CPU_FEATURES_AVX512; } - } - + } + hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; return platformInfo; - } + } hs_platform_info_t MakeCurrentPlatformInfo() { return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime())); @@ -82,7 +82,7 @@ namespace NHyperscan { SerializeDatabase = avx512_hs_serialize_database; DeserializeDatabase = avx512_hs_deserialize_database; } - } + } TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { hs_database_t* rawDb = nullptr; @@ -97,12 +97,12 @@ namespace NHyperscan { TDatabase db(rawDb); NHyperscan::TCompileError compileError(rawCompileErr); if (status != HS_SUCCESS) { - ythrow TCompileException() + ythrow TCompileException() << "Failed to compile regex: " << regex << ". " << "Error message (hyperscan): " << compileError->message; - } + } return db; - } + } TDatabase CompileMulti( const TVector<const char*>& regexs, @@ -181,8 +181,8 @@ namespace NHyperscan { TDatabase Compile(const TStringBuf& regex, unsigned int flags) { auto platformInfo = NPrivate::MakeCurrentPlatformInfo(); return NPrivate::Compile(regex, flags, &platformInfo); - } - + } + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); return NPrivate::Compile(regex, flags, &platformInfo); @@ -209,74 +209,74 @@ namespace NHyperscan { return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); } - TScratch MakeScratch(const TDatabase& db) { - hs_scratch_t* rawScratch = nullptr; + TScratch MakeScratch(const TDatabase& db) { + hs_scratch_t* rawScratch = nullptr; hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); - NHyperscan::TScratch scratch(rawScratch); - if (status != HS_SUCCESS) { - ythrow yexception() << "Failed to make scratch for hyperscan database"; - } - return scratch; - } - - void GrowScratch(TScratch& scratch, const TDatabase& db) { - hs_scratch_t* rawScratch = scratch.Get(); + NHyperscan::TScratch scratch(rawScratch); + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to make scratch for hyperscan database"; + } + return scratch; + } + + void GrowScratch(TScratch& scratch, const TDatabase& db) { + hs_scratch_t* rawScratch = scratch.Get(); hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); - if (rawScratch != scratch.Get()) { + if (rawScratch != scratch.Get()) { Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch - scratch.Reset(rawScratch); - } - if (status != HS_SUCCESS) { - ythrow yexception() << "Failed to make grow scratch for hyperscan database"; - } - } - - TScratch CloneScratch(const TScratch& scratch) { - hs_scratch_t* rawScratch = nullptr; - hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch); - TScratch scratchCopy(rawScratch); - if (status != HS_SUCCESS) { - ythrow yexception() << "Failed to clone scratch for hyperscan database"; - } - return scratchCopy; - } - - bool Matches( - const TDatabase& db, - const TScratch& scratch, + scratch.Reset(rawScratch); + } + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to make grow scratch for hyperscan database"; + } + } + + TScratch CloneScratch(const TScratch& scratch) { + hs_scratch_t* rawScratch = nullptr; + hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch); + TScratch scratchCopy(rawScratch); + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to clone scratch for hyperscan database"; + } + return scratchCopy; + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, const TStringBuf& text) { return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); - } - + } + TString Serialize(const TDatabase& db) { - char* databaseBytes = nullptr; - size_t databaseLength; + char* databaseBytes = nullptr; + size_t databaseLength; hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase( - db.Get(), - &databaseBytes, + db.Get(), + &databaseBytes, &databaseLength); - TSerializedDatabase serialization(databaseBytes); - if (status != HS_SUCCESS) { - ythrow yexception() << "Failed to serialize hyperscan database"; - } + TSerializedDatabase serialization(databaseBytes); + if (status != HS_SUCCESS) { + ythrow yexception() << "Failed to serialize hyperscan database"; + } return TString(serialization.Get(), databaseLength); - } - - TDatabase Deserialize(const TStringBuf& serialization) { - hs_database_t* rawDb = nullptr; + } + + TDatabase Deserialize(const TStringBuf& serialization) { + hs_database_t* rawDb = nullptr; hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase( - serialization.begin(), - serialization.size(), + serialization.begin(), + serialization.size(), &rawDb); - TDatabase db(rawDb); - if (status != HS_SUCCESS) { + TDatabase db(rawDb); + if (status != HS_SUCCESS) { if (status == HS_DB_PLATFORM_ERROR) { ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; } else { ythrow yexception() << "Failed to deserialize hyperscan database"; } - } - return db; - } -} + } + return db; + } +} diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index 1c8f404389..9aabcbfdd9 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -1,34 +1,34 @@ -#pragma once - -#include <contrib/libs/hyperscan/src/hs.h> - -#include <util/generic/ptr.h> -#include <util/generic/strbuf.h> -#include <util/generic/vector.h> -#include <util/generic/yexception.h> +#pragma once + +#include <contrib/libs/hyperscan/src/hs.h> + +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> #include <util/system/cpu_id.h> - -namespace NHyperscan { + +namespace NHyperscan { using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; template<typename TNativeDeleter, TNativeDeleter NativeDeleter> - class TDeleter { - public: + class TDeleter { + public: template<typename T> - static void Destroy(T* ptr) { - NativeDeleter(ptr); - } - }; - - using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>; - - using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; - + static void Destroy(T* ptr) { + NativeDeleter(ptr); + } + }; + + using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>; + + using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; + class TCompileException : public yexception { - }; - + }; + namespace NPrivate { enum class ERuntime { @@ -116,16 +116,16 @@ namespace NHyperscan { const TImpl& impl); } - TDatabase Compile(const TStringBuf& regex, unsigned int flags); - + TDatabase Compile(const TStringBuf& regex, unsigned int flags); + TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); - TDatabase CompileMulti( + TDatabase CompileMulti( const TVector<const char*>& regexs, const TVector<unsigned int>& flags, const TVector<unsigned int>& ids, const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); - + TDatabase CompileMulti( const TVector<const char*>& regexs, const TVector<unsigned int>& flags, @@ -133,28 +133,28 @@ namespace NHyperscan { TCPUFeatures cpuFeatures, const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); - TScratch MakeScratch(const TDatabase& db); - - void GrowScratch(TScratch& scratch, const TDatabase& db); - - TScratch CloneScratch(const TScratch& scratch); - + TScratch MakeScratch(const TDatabase& db); + + void GrowScratch(TScratch& scratch, const TDatabase& db); + + TScratch CloneScratch(const TScratch& scratch); + template<typename TCallback> - void Scan( - const TDatabase& db, - const TScratch& scratch, - const TStringBuf& text, + void Scan( + const TDatabase& db, + const TScratch& scratch, + const TStringBuf& text, TCallback& callback // applied to index of matched regex - ) { + ) { NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); - } - - bool Matches( - const TDatabase& db, - const TScratch& scratch, + } + + bool Matches( + const TDatabase& db, + const TScratch& scratch, const TStringBuf& text); - + TString Serialize(const TDatabase& db); - - TDatabase Deserialize(const TStringBuf& serialization); -} + + TDatabase Deserialize(const TStringBuf& serialization); +} diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 9caa53f2e7..28232b6982 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -1,125 +1,125 @@ #include <library/cpp/regex/hyperscan/hyperscan.h> - + #include <library/cpp/testing/unittest/registar.h> - -#include <util/generic/set.h> - + +#include <util/generic/set.h> + #include <array> #include <algorithm> Y_UNIT_TEST_SUITE(HyperscanWrappers) { - using namespace NHyperscan; + using namespace NHyperscan; using namespace NHyperscan::NPrivate; - + Y_UNIT_TEST(CompileAndScan) { - TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); - TScratch scratch = MakeScratch(db); - - unsigned int foundId = 42; + TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); + TScratch scratch = MakeScratch(db); + + unsigned int foundId = 42; auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { - foundId = id; - }; - NHyperscan::Scan( - db, - scratch, - "abc", + foundId = id; + }; + NHyperscan::Scan( + db, + scratch, + "abc", callback); - UNIT_ASSERT_EQUAL(foundId, 0); - } - + UNIT_ASSERT_EQUAL(foundId, 0); + } + Y_UNIT_TEST(Matches) { - NHyperscan::TDatabase db = NHyperscan::Compile( - "a.c", + NHyperscan::TDatabase db = NHyperscan::Compile( + "a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc")); - UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo")); - } - + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc")); + UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo")); + } + Y_UNIT_TEST(Multi) { - NHyperscan::TDatabase db = NHyperscan::CompileMulti( - { - "foo", - "bar", - }, - { - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, - }, - { - 42, - 241, + NHyperscan::TDatabase db = NHyperscan::CompileMulti( + { + "foo", + "bar", + }, + { + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, + }, + { + 42, + 241, }); - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - - UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo")); - UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar")); - UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR")); - UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO")); - + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo")); + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar")); + UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR")); + UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO")); + TSet<unsigned int> foundIds; auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { - foundIds.insert(id); - }; - NHyperscan::Scan( - db, - scratch, - "fooBaR", + foundIds.insert(id); + }; + NHyperscan::Scan( + db, + scratch, + "fooBaR", callback); - UNIT_ASSERT_EQUAL(foundIds.size(), 2); + UNIT_ASSERT_EQUAL(foundIds.size(), 2); UNIT_ASSERT(foundIds.contains(42)); UNIT_ASSERT(foundIds.contains(241)); - } - - // https://ml.yandex-team.ru/thread/2370000002965712422/ + } + + // https://ml.yandex-team.ru/thread/2370000002965712422/ Y_UNIT_TEST(MultiRegression) { - NHyperscan::CompileMulti( - { - "aa.bb/cc.dd", - }, - { - HS_FLAG_UTF8, - }, - { - 0, + NHyperscan::CompileMulti( + { + "aa.bb/cc.dd", + }, + { + HS_FLAG_UTF8, + }, + { + 0, }); - } - + } + Y_UNIT_TEST(Serialize) { - NHyperscan::TDatabase db = NHyperscan::Compile( - "foo", + NHyperscan::TDatabase db = NHyperscan::Compile( + "foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); TString serialization = Serialize(db); - db.Reset(); - TDatabase db2 = Deserialize(serialization); - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2); - - UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo")); - UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO")); - } - + db.Reset(); + TDatabase db2 = Deserialize(serialization); + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2); + + UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo")); + UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO")); + } + Y_UNIT_TEST(GrowScratch) { - NHyperscan::TDatabase db1 = NHyperscan::Compile( - "foo", + NHyperscan::TDatabase db1 = NHyperscan::Compile( + "foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); - NHyperscan::TDatabase db2 = NHyperscan::Compile( - "longer\\w\\w\\wpattern", + NHyperscan::TDatabase db2 = NHyperscan::Compile( + "longer\\w\\w\\wpattern", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8); - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1); - NHyperscan::GrowScratch(scratch, db2); - UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo")); - UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern")); - } - + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1); + NHyperscan::GrowScratch(scratch, db2); + UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo")); + UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern")); + } + Y_UNIT_TEST(CloneScratch) { - NHyperscan::TDatabase db = NHyperscan::Compile( - "foo", + NHyperscan::TDatabase db = NHyperscan::Compile( + "foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); - NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db); - NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1); - scratch1.Reset(); - UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); - } + NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db); + NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1); + scratch1.Reset(); + UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); + } class TSimpleSingleRegex { public: @@ -228,4 +228,4 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { TestCrossPlatformCompile<TAvx2SingleRegex>(); TestCrossPlatformCompile<TSimpleMultiRegex>(); } -} +} diff --git a/library/cpp/regex/hyperscan/ut/ya.make b/library/cpp/regex/hyperscan/ut/ya.make index da67b88672..c255408521 100644 --- a/library/cpp/regex/hyperscan/ut/ya.make +++ b/library/cpp/regex/hyperscan/ut/ya.make @@ -1,13 +1,13 @@ UNITTEST() - + PEERDIR( library/cpp/regex/hyperscan ) OWNER(g:antiinfra) - -SRCS( - hyperscan_ut.cpp -) - -END() + +SRCS( + hyperscan_ut.cpp +) + +END() diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make index e99130ae18..a2ea918380 100644 --- a/library/cpp/regex/hyperscan/ya.make +++ b/library/cpp/regex/hyperscan/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - +LIBRARY() + OWNER(g:antiinfra) - -PEERDIR( - contrib/libs/hyperscan + +PEERDIR( + contrib/libs/hyperscan contrib/libs/hyperscan/runtime_core2 contrib/libs/hyperscan/runtime_corei7 contrib/libs/hyperscan/runtime_avx2 contrib/libs/hyperscan/runtime_avx512 -) - -SRCS( - hyperscan.cpp -) - -END() +) + +SRCS( + hyperscan.cpp +) + +END() RECURSE_FOR_TESTS(ut) |