diff options
| author | bnagaev <[email protected]> | 2022-02-10 16:47:04 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:47:04 +0300 | 
| commit | c74559fb88da8adac0d9186cfa55a6b13c47695f (patch) | |
| tree | b83306b6e37edeea782e9eed673d89286c4fef35 /library/cpp/regex/hyperscan | |
| parent | d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex/hyperscan')
| -rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 140 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 94 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 188 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/ut/ya.make | 14 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 24 | 
5 files changed, 230 insertions, 230 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index cb15d04ae50..ba321f9c29c 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -1,5 +1,5 @@ -#include "hyperscan.h"  -  +#include "hyperscan.h" +  #include <contrib/libs/hyperscan/runtime_core2/hs_common.h>  #include <contrib/libs/hyperscan/runtime_core2/hs_runtime.h>  #include <contrib/libs/hyperscan/runtime_corei7/hs_common.h> @@ -11,11 +11,11 @@  #include <util/generic/singleton.h> -namespace NHyperscan {  -    using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>;  -  -    using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;  -  +namespace NHyperscan { +    using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>; + +    using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>; +      namespace NPrivate {          ERuntime DetectCurrentRuntime() {              if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) { @@ -42,12 +42,12 @@ namespace NHyperscan {                  case ERuntime::AVX512:                      return CPU_FEATURES_AVX512;              } -        }  -  +        } +          hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {              hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};              return platformInfo; -        }  +        }          hs_platform_info_t MakeCurrentPlatformInfo() {              return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime())); @@ -82,7 +82,7 @@ namespace NHyperscan {                      SerializeDatabase = avx512_hs_serialize_database;                      DeserializeDatabase = avx512_hs_deserialize_database;              } -        }  +        }          TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {              hs_database_t* rawDb = nullptr; @@ -97,12 +97,12 @@ namespace NHyperscan {              TDatabase db(rawDb);              NHyperscan::TCompileError compileError(rawCompileErr);              if (status != HS_SUCCESS) { -                ythrow TCompileException()  +                ythrow TCompileException()                          << "Failed to compile regex: " << regex << ". "                          << "Error message (hyperscan): " << compileError->message; -            }  +            }              return db; -        }  +        }          TDatabase CompileMulti(                  const TVector<const char*>& regexs, @@ -181,8 +181,8 @@ namespace NHyperscan {      TDatabase Compile(const TStringBuf& regex, unsigned int flags) {          auto platformInfo = NPrivate::MakeCurrentPlatformInfo();          return NPrivate::Compile(regex, flags, &platformInfo); -    }  -  +    } +      TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {          auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);          return NPrivate::Compile(regex, flags, &platformInfo); @@ -209,74 +209,74 @@ namespace NHyperscan {          return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);      } -    TScratch MakeScratch(const TDatabase& db) {  -        hs_scratch_t* rawScratch = nullptr;  +    TScratch MakeScratch(const TDatabase& db) { +        hs_scratch_t* rawScratch = nullptr;          hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); -        NHyperscan::TScratch scratch(rawScratch);  -        if (status != HS_SUCCESS) {  -            ythrow yexception() << "Failed to make scratch for hyperscan database";  -        }  -        return scratch;  -    }  -  -    void GrowScratch(TScratch& scratch, const TDatabase& db) {  -        hs_scratch_t* rawScratch = scratch.Get();  +        NHyperscan::TScratch scratch(rawScratch); +        if (status != HS_SUCCESS) { +            ythrow yexception() << "Failed to make scratch for hyperscan database"; +        } +        return scratch; +    } + +    void GrowScratch(TScratch& scratch, const TDatabase& db) { +        hs_scratch_t* rawScratch = scratch.Get();          hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch); -        if (rawScratch != scratch.Get()) {  +        if (rawScratch != scratch.Get()) {              Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch -            scratch.Reset(rawScratch);  -        }  -        if (status != HS_SUCCESS) {  -            ythrow yexception() << "Failed to make grow scratch for hyperscan database";  -        }  -    }  -  -    TScratch CloneScratch(const TScratch& scratch) {  -        hs_scratch_t* rawScratch = nullptr;  -        hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch);  -        TScratch scratchCopy(rawScratch);  -        if (status != HS_SUCCESS) {  -            ythrow yexception() << "Failed to clone scratch for hyperscan database";  -        }  -        return scratchCopy;  -    }  -  -    bool Matches(  -        const TDatabase& db,  -        const TScratch& scratch,  +            scratch.Reset(rawScratch); +        } +        if (status != HS_SUCCESS) { +            ythrow yexception() << "Failed to make grow scratch for hyperscan database"; +        } +    } + +    TScratch CloneScratch(const TScratch& scratch) { +        hs_scratch_t* rawScratch = nullptr; +        hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch); +        TScratch scratchCopy(rawScratch); +        if (status != HS_SUCCESS) { +            ythrow yexception() << "Failed to clone scratch for hyperscan database"; +        } +        return scratchCopy; +    } + +    bool Matches( +        const TDatabase& db, +        const TScratch& scratch,          const TStringBuf& text)      {          return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>()); -    }  -  +    } +      TString Serialize(const TDatabase& db) { -        char* databaseBytes = nullptr;  -        size_t databaseLength;  +        char* databaseBytes = nullptr; +        size_t databaseLength;          hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase( -            db.Get(),  -            &databaseBytes,  +            db.Get(), +            &databaseBytes,              &databaseLength); -        TSerializedDatabase serialization(databaseBytes);  -        if (status != HS_SUCCESS) {  -            ythrow yexception() << "Failed to serialize hyperscan database";  -        }  +        TSerializedDatabase serialization(databaseBytes); +        if (status != HS_SUCCESS) { +            ythrow yexception() << "Failed to serialize hyperscan database"; +        }          return TString(serialization.Get(), databaseLength); -    }  -  -    TDatabase Deserialize(const TStringBuf& serialization) {  -        hs_database_t* rawDb = nullptr;  +    } + +    TDatabase Deserialize(const TStringBuf& serialization) { +        hs_database_t* rawDb = nullptr;          hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase( -            serialization.begin(),  -            serialization.size(),  +            serialization.begin(), +            serialization.size(),              &rawDb); -        TDatabase db(rawDb);  -        if (status != HS_SUCCESS) {  +        TDatabase db(rawDb); +        if (status != HS_SUCCESS) {              if (status == HS_DB_PLATFORM_ERROR) {                  ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";              } else {                  ythrow yexception() << "Failed to deserialize hyperscan database";              } -        }  -        return db;  -    }  -}  +        } +        return db; +    } +} diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index 9aabcbfdd9f..1c8f4043892 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -1,34 +1,34 @@ -#pragma once  -  -#include <contrib/libs/hyperscan/src/hs.h>  -  -#include <util/generic/ptr.h>  -#include <util/generic/strbuf.h>  -#include <util/generic/vector.h>  -#include <util/generic/yexception.h>  +#pragma once + +#include <contrib/libs/hyperscan/src/hs.h> + +#include <util/generic/ptr.h> +#include <util/generic/strbuf.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h>  #include <util/system/cpu_id.h> -  -namespace NHyperscan {  + +namespace NHyperscan {      using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);      constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;      constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;      template<typename TNativeDeleter, TNativeDeleter NativeDeleter> -    class TDeleter {  -    public:  +    class TDeleter { +    public:          template<typename T> -        static void Destroy(T* ptr) {  -            NativeDeleter(ptr);  -        }  -    };  -  -    using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;  -  -    using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;  -  +        static void Destroy(T* ptr) { +            NativeDeleter(ptr); +        } +    }; + +    using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>; + +    using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; +      class TCompileException : public yexception { -    };  -  +    }; +      namespace NPrivate {          enum class ERuntime { @@ -116,16 +116,16 @@ namespace NHyperscan {              const TImpl& impl);      } -    TDatabase Compile(const TStringBuf& regex, unsigned int flags);  -  +    TDatabase Compile(const TStringBuf& regex, unsigned int flags); +      TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); -    TDatabase CompileMulti(  +    TDatabase CompileMulti(          const TVector<const char*>& regexs,          const TVector<unsigned int>& flags,          const TVector<unsigned int>& ids,          const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); -  +      TDatabase CompileMulti(          const TVector<const char*>& regexs,          const TVector<unsigned int>& flags, @@ -133,28 +133,28 @@ namespace NHyperscan {          TCPUFeatures cpuFeatures,          const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); -    TScratch MakeScratch(const TDatabase& db);  -  -    void GrowScratch(TScratch& scratch, const TDatabase& db);  -  -    TScratch CloneScratch(const TScratch& scratch);  -  +    TScratch MakeScratch(const TDatabase& db); + +    void GrowScratch(TScratch& scratch, const TDatabase& db); + +    TScratch CloneScratch(const TScratch& scratch); +      template<typename TCallback> -    void Scan(  -        const TDatabase& db,  -        const TScratch& scratch,  -        const TStringBuf& text,  +    void Scan( +        const TDatabase& db, +        const TScratch& scratch, +        const TStringBuf& text,          TCallback& callback // applied to index of matched regex -    ) {  +    ) {          NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>()); -    }  -  -    bool Matches(  -        const TDatabase& db,  -        const TScratch& scratch,  +    } + +    bool Matches( +        const TDatabase& db, +        const TScratch& scratch,          const TStringBuf& text); -  +      TString Serialize(const TDatabase& db); -  -    TDatabase Deserialize(const TStringBuf& serialization);  -}  + +    TDatabase Deserialize(const TStringBuf& serialization); +} diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 28232b69822..9caa53f2e7f 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -1,125 +1,125 @@  #include <library/cpp/regex/hyperscan/hyperscan.h> -  +  #include <library/cpp/testing/unittest/registar.h> -  -#include <util/generic/set.h>  -  + +#include <util/generic/set.h> +  #include <array>  #include <algorithm>  Y_UNIT_TEST_SUITE(HyperscanWrappers) { -    using namespace NHyperscan;  +    using namespace NHyperscan;      using namespace NHyperscan::NPrivate; -  +      Y_UNIT_TEST(CompileAndScan) { -        TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);  -        TScratch scratch = MakeScratch(db);  -  -        unsigned int foundId = 42;  +        TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); +        TScratch scratch = MakeScratch(db); + +        unsigned int foundId = 42;          auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { -            foundId = id;  -        };  -        NHyperscan::Scan(  -            db,  -            scratch,  -            "abc",  +            foundId = id; +        }; +        NHyperscan::Scan( +            db, +            scratch, +            "abc",              callback); -        UNIT_ASSERT_EQUAL(foundId, 0);  -    }  -  +        UNIT_ASSERT_EQUAL(foundId, 0); +    } +      Y_UNIT_TEST(Matches) { -        NHyperscan::TDatabase db = NHyperscan::Compile(  -            "a.c",  +        NHyperscan::TDatabase db = NHyperscan::Compile( +            "a.c",              HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); -        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);  -        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc"));  -        UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo"));  -    }  -  +        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc")); +        UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo")); +    } +      Y_UNIT_TEST(Multi) { -        NHyperscan::TDatabase db = NHyperscan::CompileMulti(  -            {  -                "foo",  -                "bar",  -            },  -            {  -                HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,  -                HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,  -            },  -            {  -                42,  -                241,  +        NHyperscan::TDatabase db = NHyperscan::CompileMulti( +            { +                "foo", +                "bar", +            }, +            { +                HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, +                HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, +            }, +            { +                42, +                241,              }); -        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);  -  -        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo"));  -        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar"));  -        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR"));  -        UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO"));  -  +        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo")); +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar")); +        UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR")); +        UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO")); +          TSet<unsigned int> foundIds;          auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { -            foundIds.insert(id);  -        };  -        NHyperscan::Scan(  -            db,  -            scratch,  -            "fooBaR",  +            foundIds.insert(id); +        }; +        NHyperscan::Scan( +            db, +            scratch, +            "fooBaR",              callback); -        UNIT_ASSERT_EQUAL(foundIds.size(), 2);  +        UNIT_ASSERT_EQUAL(foundIds.size(), 2);          UNIT_ASSERT(foundIds.contains(42));          UNIT_ASSERT(foundIds.contains(241)); -    }  -  -    // https://ml.yandex-team.ru/thread/2370000002965712422/  +    } + +    // https://ml.yandex-team.ru/thread/2370000002965712422/      Y_UNIT_TEST(MultiRegression) { -        NHyperscan::CompileMulti(  -            {  -                "aa.bb/cc.dd",  -            },  -            {  -                HS_FLAG_UTF8,  -            },  -            {  -                0,  +        NHyperscan::CompileMulti( +            { +                "aa.bb/cc.dd", +            }, +            { +                HS_FLAG_UTF8, +            }, +            { +                0,              }); -    }  -  +    } +      Y_UNIT_TEST(Serialize) { -        NHyperscan::TDatabase db = NHyperscan::Compile(  -            "foo",  +        NHyperscan::TDatabase db = NHyperscan::Compile( +            "foo",              HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);          TString serialization = Serialize(db); -        db.Reset();  -        TDatabase db2 = Deserialize(serialization);  -        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2);  -  -        UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo"));  -        UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO"));  -    }  -  +        db.Reset(); +        TDatabase db2 = Deserialize(serialization); +        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2); + +        UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo")); +        UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO")); +    } +      Y_UNIT_TEST(GrowScratch) { -        NHyperscan::TDatabase db1 = NHyperscan::Compile(  -            "foo",  +        NHyperscan::TDatabase db1 = NHyperscan::Compile( +            "foo",              HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); -        NHyperscan::TDatabase db2 = NHyperscan::Compile(  -            "longer\\w\\w\\wpattern",  +        NHyperscan::TDatabase db2 = NHyperscan::Compile( +            "longer\\w\\w\\wpattern",              HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8); -        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1);  -        NHyperscan::GrowScratch(scratch, db2);  -        UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo"));  -        UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern"));  -    }  -  +        NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1); +        NHyperscan::GrowScratch(scratch, db2); +        UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo")); +        UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern")); +    } +      Y_UNIT_TEST(CloneScratch) { -        NHyperscan::TDatabase db = NHyperscan::Compile(  -            "foo",  +        NHyperscan::TDatabase db = NHyperscan::Compile( +            "foo",              HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); -        NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db);  -        NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1);  -        scratch1.Reset();  -        UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo"));  -    }  +        NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db); +        NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1); +        scratch1.Reset(); +        UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); +    }      class TSimpleSingleRegex {      public: @@ -228,4 +228,4 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {          TestCrossPlatformCompile<TAvx2SingleRegex>();          TestCrossPlatformCompile<TSimpleMultiRegex>();      } -}  +} diff --git a/library/cpp/regex/hyperscan/ut/ya.make b/library/cpp/regex/hyperscan/ut/ya.make index c255408521a..da67b88672a 100644 --- a/library/cpp/regex/hyperscan/ut/ya.make +++ b/library/cpp/regex/hyperscan/ut/ya.make @@ -1,13 +1,13 @@  UNITTEST() -  +  PEERDIR(      library/cpp/regex/hyperscan  )  OWNER(g:antiinfra) -  -SRCS(  -    hyperscan_ut.cpp  -)  -  -END()  + +SRCS( +    hyperscan_ut.cpp +) + +END() diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make index a2ea9183805..e99130ae186 100644 --- a/library/cpp/regex/hyperscan/ya.make +++ b/library/cpp/regex/hyperscan/ya.make @@ -1,19 +1,19 @@ -LIBRARY()  -  +LIBRARY() +  OWNER(g:antiinfra) -  -PEERDIR(  -    contrib/libs/hyperscan  + +PEERDIR( +    contrib/libs/hyperscan      contrib/libs/hyperscan/runtime_core2      contrib/libs/hyperscan/runtime_corei7      contrib/libs/hyperscan/runtime_avx2      contrib/libs/hyperscan/runtime_avx512 -)  -  -SRCS(  -    hyperscan.cpp  -)  -  -END()  +) + +SRCS( +    hyperscan.cpp +) + +END()  RECURSE_FOR_TESTS(ut)  | 
