diff options
| author | jakovenko-dm <[email protected]> | 2022-02-10 16:48:06 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:48:06 +0300 | 
| commit | b552921bb1c7ea535f6e0e1706feb311c27d6036 (patch) | |
| tree | b222e5ac2e2e98872661c51ccceee5da0d291e13 /library/cpp/regex | |
| parent | 7077baee21e33a3ad2e790527b1c50b22c244db3 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/regex')
| -rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.cpp | 352 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/hyperscan.h | 200 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 224 | ||||
| -rw-r--r-- | library/cpp/regex/hyperscan/ya.make | 4 | 
4 files changed, 390 insertions, 390 deletions
| diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp index 82ca3880d17..ba321f9c29c 100644 --- a/library/cpp/regex/hyperscan/hyperscan.cpp +++ b/library/cpp/regex/hyperscan/hyperscan.cpp @@ -17,201 +17,201 @@ namespace NHyperscan {      using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;      namespace NPrivate { -        ERuntime DetectCurrentRuntime() {  +        ERuntime DetectCurrentRuntime() {              if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) { -                return ERuntime::AVX512;  +                return ERuntime::AVX512;              } else if (NX86::HaveAVX() && NX86::HaveAVX2()) { -                return ERuntime::AVX2;  +                return ERuntime::AVX2;              } else if (NX86::HaveSSE42() && NX86::HavePOPCNT()) { -                return ERuntime::Corei7;  +                return ERuntime::Corei7;              } else { -                return ERuntime::Core2;  +                return ERuntime::Core2;              }          } -        TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) {  -            switch (runtime) {  -                default:  -                    Y_ASSERT(false);  +        TCPUFeatures RuntimeCpuFeatures(ERuntime runtime) { +            switch (runtime) { +                default: +                    Y_ASSERT(false);                      [[fallthrough]]; -                case ERuntime::Core2:  -                case ERuntime::Corei7:  -                    return 0;  -                case ERuntime::AVX2:  -                    return CPU_FEATURES_AVX2;  -                case ERuntime::AVX512:  -                    return CPU_FEATURES_AVX512;  -            }  +                case ERuntime::Core2: +                case ERuntime::Corei7: +                    return 0; +                case ERuntime::AVX2: +                    return CPU_FEATURES_AVX2; +                case ERuntime::AVX512: +                    return CPU_FEATURES_AVX512; +            }          } -        hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {  -            hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};  -            return platformInfo;  +        hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) { +            hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0}; +            return platformInfo;          } -  +          hs_platform_info_t MakeCurrentPlatformInfo() {              return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime()));          } -        TImpl::TImpl(ERuntime runtime) {  -            switch (runtime) {  -                default:  -                    Y_ASSERT(false);  +        TImpl::TImpl(ERuntime runtime) { +            switch (runtime) { +                default: +                    Y_ASSERT(false);                      [[fallthrough]]; -                case ERuntime::Core2:  -                    AllocScratch = core2_hs_alloc_scratch;  -                    Scan = core2_hs_scan;  -                    SerializeDatabase = core2_hs_serialize_database;  -                    DeserializeDatabase = core2_hs_deserialize_database;  -                    break;  -                case ERuntime::Corei7:  -                    AllocScratch = corei7_hs_alloc_scratch;  -                    Scan = corei7_hs_scan;  -                    SerializeDatabase = corei7_hs_serialize_database;  -                    DeserializeDatabase = corei7_hs_deserialize_database;  -                    break;  -                case ERuntime::AVX2:  -                    AllocScratch = avx2_hs_alloc_scratch;  -                    Scan = avx2_hs_scan;  -                    SerializeDatabase = avx2_hs_serialize_database;  -                    DeserializeDatabase = avx2_hs_deserialize_database;  -                    break;  -                case ERuntime::AVX512:  -                    AllocScratch = avx512_hs_alloc_scratch;  -                    Scan = avx512_hs_scan;  -                    SerializeDatabase = avx512_hs_serialize_database;  -                    DeserializeDatabase = avx512_hs_deserialize_database;  -            }  +                case ERuntime::Core2: +                    AllocScratch = core2_hs_alloc_scratch; +                    Scan = core2_hs_scan; +                    SerializeDatabase = core2_hs_serialize_database; +                    DeserializeDatabase = core2_hs_deserialize_database; +                    break; +                case ERuntime::Corei7: +                    AllocScratch = corei7_hs_alloc_scratch; +                    Scan = corei7_hs_scan; +                    SerializeDatabase = corei7_hs_serialize_database; +                    DeserializeDatabase = corei7_hs_deserialize_database; +                    break; +                case ERuntime::AVX2: +                    AllocScratch = avx2_hs_alloc_scratch; +                    Scan = avx2_hs_scan; +                    SerializeDatabase = avx2_hs_serialize_database; +                    DeserializeDatabase = avx2_hs_deserialize_database; +                    break; +                case ERuntime::AVX512: +                    AllocScratch = avx512_hs_alloc_scratch; +                    Scan = avx512_hs_scan; +                    SerializeDatabase = avx512_hs_serialize_database; +                    DeserializeDatabase = avx512_hs_deserialize_database; +            }          } -  -        TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {  -            hs_database_t* rawDb = nullptr;  -            hs_compile_error_t* rawCompileErr = nullptr;  -            hs_error_t status = hs_compile(  -                    regex.begin(),  -                    flags,  -                    HS_MODE_BLOCK,  -                    platform,  -                    &rawDb,  -                    &rawCompileErr);  -            TDatabase db(rawDb);  -            NHyperscan::TCompileError compileError(rawCompileErr);  -            if (status != HS_SUCCESS) {  + +        TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) { +            hs_database_t* rawDb = nullptr; +            hs_compile_error_t* rawCompileErr = nullptr; +            hs_error_t status = hs_compile( +                    regex.begin(), +                    flags, +                    HS_MODE_BLOCK, +                    platform, +                    &rawDb, +                    &rawCompileErr); +            TDatabase db(rawDb); +            NHyperscan::TCompileError compileError(rawCompileErr); +            if (status != HS_SUCCESS) {                  ythrow TCompileException() -                        << "Failed to compile regex: " << regex << ". "  -                        << "Error message (hyperscan): " << compileError->message;  +                        << "Failed to compile regex: " << regex << ". " +                        << "Error message (hyperscan): " << compileError->message; +            } +            return db; +        } + +        TDatabase CompileMulti( +                const TVector<const char*>& regexs, +                const TVector<unsigned int>& flags, +                const TVector<unsigned int>& ids, +                hs_platform_info_t* platform, +                const TVector<const hs_expr_ext_t*>* extendedParameters) { +            unsigned int count = regexs.size(); +            if (flags.size() != count) { +                ythrow yexception() +                        << "Mismatch of sizes vectors passed to CompileMulti. " +                        << "size(regexs) = " << regexs.size() << ". " +                        << "size(flags) = " << flags.size() << "."; +            } +            if (ids.size() != count) { +                ythrow yexception() +                        << "Mismatch of sizes vectors passed to CompileMulti. " +                        << "size(regexs) = " << regexs.size() << ". " +                        << "size(ids) = " << ids.size() << "."; +            } +            if (extendedParameters && extendedParameters->size() != count) { +                ythrow yexception() +                        << "Mismatch of sizes vectors passed to CompileMulti. " +                        << "size(regexs) = " << regexs.size() << ". " +                        << "size(extendedParameters) = " << extendedParameters->size() << ".";              } -            return db;  +            hs_database_t* rawDb = nullptr; +            hs_compile_error_t* rawCompileErr = nullptr; +            hs_error_t status = hs_compile_ext_multi( +                    regexs.data(), +                    flags.data(), +                    ids.data(), +                    extendedParameters ? extendedParameters->data() : nullptr, +                    count, +                    HS_MODE_BLOCK, +                    platform, +                    &rawDb, +                    &rawCompileErr); +            TDatabase db(rawDb); +            NHyperscan::TCompileError compileError(rawCompileErr); +            if (status != HS_SUCCESS) { +                if (compileError->expression >= 0) { +                    const char* regex = regexs[compileError->expression]; +                    ythrow TCompileException() +                            << "Failed to compile regex: " << regex << ". " +                            << "Error message (hyperscan): " << compileError->message; +                } else { +                    ythrow TCompileException() +                            << "Failed to compile multiple regexs. " +                            << "Error message (hyperscan): " << compileError->message; +                } +            } +            return db; +        } + +        bool Matches( +                const TDatabase& db, +                const TScratch& scratch, +                const TStringBuf& text, +                const TImpl& impl) { +            bool result = false; +            auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) { +                result = true; +                return 1; // stop scan +            }; +            Scan( +                    db, +                    scratch, +                    text, +                    callback, +                    impl); +            return result;          } -  -        TDatabase CompileMulti(  -                const TVector<const char*>& regexs,  -                const TVector<unsigned int>& flags,  -                const TVector<unsigned int>& ids,  -                hs_platform_info_t* platform,  -                const TVector<const hs_expr_ext_t*>* extendedParameters) {  -            unsigned int count = regexs.size();  -            if (flags.size() != count) {  -                ythrow yexception()  -                        << "Mismatch of sizes vectors passed to CompileMulti. "  -                        << "size(regexs) = " << regexs.size() << ". "  -                        << "size(flags) = " << flags.size() << ".";  -            }  -            if (ids.size() != count) {  -                ythrow yexception()  -                        << "Mismatch of sizes vectors passed to CompileMulti. "  -                        << "size(regexs) = " << regexs.size() << ". "  -                        << "size(ids) = " << ids.size() << ".";  -            }  -            if (extendedParameters && extendedParameters->size() != count) {  -                ythrow yexception()  -                        << "Mismatch of sizes vectors passed to CompileMulti. "  -                        << "size(regexs) = " << regexs.size() << ". "  -                        << "size(extendedParameters) = " << extendedParameters->size() << ".";  -            }  -            hs_database_t* rawDb = nullptr;  -            hs_compile_error_t* rawCompileErr = nullptr;  -            hs_error_t status = hs_compile_ext_multi(  -                    regexs.data(),  -                    flags.data(),  -                    ids.data(),  -                    extendedParameters ? extendedParameters->data() : nullptr,  -                    count,  -                    HS_MODE_BLOCK,  -                    platform,  -                    &rawDb,  -                    &rawCompileErr);  -            TDatabase db(rawDb);  -            NHyperscan::TCompileError compileError(rawCompileErr);  -            if (status != HS_SUCCESS) {  -                if (compileError->expression >= 0) {  -                    const char* regex = regexs[compileError->expression];  -                    ythrow TCompileException()  -                            << "Failed to compile regex: " << regex << ". "  -                            << "Error message (hyperscan): " << compileError->message;  -                } else {  -                    ythrow TCompileException()  -                            << "Failed to compile multiple regexs. "  -                            << "Error message (hyperscan): " << compileError->message;  -                }  -            }  -            return db;  -        }  -  -        bool Matches(  -                const TDatabase& db,  -                const TScratch& scratch,  -                const TStringBuf& text,  -                const TImpl& impl) {  -            bool result = false;  -            auto callback = [&](unsigned int /* id */, unsigned long long /* from */, unsigned long long /* to */) {  -                result = true;  -                return 1; // stop scan  -            };  -            Scan(  -                    db,  -                    scratch,  -                    text,  -                    callback,  -                    impl);  -            return result;  -        }  -    } // namespace NPrivate  -  -    TDatabase Compile(const TStringBuf& regex, unsigned int flags) {  +    } // namespace NPrivate + +    TDatabase Compile(const TStringBuf& regex, unsigned int flags) {          auto platformInfo = NPrivate::MakeCurrentPlatformInfo();          return NPrivate::Compile(regex, flags, &platformInfo);      } -    TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {  -        auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);  -        return NPrivate::Compile(regex, flags, &platformInfo);  -    }  -  -    TDatabase CompileMulti(  -            const TVector<const char*>& regexs,  -            const TVector<unsigned int>& flags,  -            const TVector<unsigned int>& ids,  -            const TVector<const hs_expr_ext_t*>* extendedParameters)  -    {  +    TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) { +        auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); +        return NPrivate::Compile(regex, flags, &platformInfo); +    } + +    TDatabase CompileMulti( +            const TVector<const char*>& regexs, +            const TVector<unsigned int>& flags, +            const TVector<unsigned int>& ids, +            const TVector<const hs_expr_ext_t*>* extendedParameters) +    {          auto platformInfo = NPrivate::MakeCurrentPlatformInfo();          return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); -    }  -  -    TDatabase CompileMulti(  -        const TVector<const char*>& regexs,  -        const TVector<unsigned int>& flags,  -        const TVector<unsigned int>& ids,  -        TCPUFeatures cpuFeatures,  -        const TVector<const hs_expr_ext_t*>* extendedParameters)  -    {  -        auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);  -        return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);  -    }  -  +    } + +    TDatabase CompileMulti( +        const TVector<const char*>& regexs, +        const TVector<unsigned int>& flags, +        const TVector<unsigned int>& ids, +        TCPUFeatures cpuFeatures, +        const TVector<const hs_expr_ext_t*>* extendedParameters) +    { +        auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures); +        return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters); +    } +      TScratch MakeScratch(const TDatabase& db) {          hs_scratch_t* rawScratch = nullptr; -        hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);  +        hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);          NHyperscan::TScratch scratch(rawScratch);          if (status != HS_SUCCESS) {              ythrow yexception() << "Failed to make scratch for hyperscan database"; @@ -221,7 +221,7 @@ namespace NHyperscan {      void GrowScratch(TScratch& scratch, const TDatabase& db) {          hs_scratch_t* rawScratch = scratch.Get(); -        hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);  +        hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);          if (rawScratch != scratch.Get()) {              Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch              scratch.Reset(rawScratch); @@ -244,9 +244,9 @@ namespace NHyperscan {      bool Matches(          const TDatabase& db,          const TScratch& scratch, -        const TStringBuf& text)  -    {  -        return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());  +        const TStringBuf& text) +    { +        return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());      }      TString Serialize(const TDatabase& db) { @@ -271,11 +271,11 @@ namespace NHyperscan {              &rawDb);          TDatabase db(rawDb);          if (status != HS_SUCCESS) { -            if (status == HS_DB_PLATFORM_ERROR) {  -                ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";  -            } else {  -                ythrow yexception() << "Failed to deserialize hyperscan database";  -            }  +            if (status == HS_DB_PLATFORM_ERROR) { +                ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU"; +            } else { +                ythrow yexception() << "Failed to deserialize hyperscan database"; +            }          }          return db;      } diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h index ef50cca08e5..1c8f4043892 100644 --- a/library/cpp/regex/hyperscan/hyperscan.h +++ b/library/cpp/regex/hyperscan/hyperscan.h @@ -9,14 +9,14 @@  #include <util/system/cpu_id.h>  namespace NHyperscan { -    using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);  -    constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;  -    constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;  -  -    template<typename TNativeDeleter, TNativeDeleter NativeDeleter>  +    using TCPUFeatures = decltype(hs_platform_info_t::cpu_features); +    constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2; +    constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2; + +    template<typename TNativeDeleter, TNativeDeleter NativeDeleter>      class TDeleter {      public: -        template<typename T>  +        template<typename T>          static void Destroy(T* ptr) {              NativeDeleter(ptr);          } @@ -26,127 +26,127 @@ namespace NHyperscan {      using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>; -    class TCompileException : public yexception {  +    class TCompileException : public yexception {      }; -  +      namespace NPrivate { -        enum class ERuntime {  -            Core2 = 0,  -            Corei7 = 1,  -            AVX2 = 2,  -            AVX512 = 3  -        };  -  -        ERuntime DetectCurrentRuntime();  -  -        TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);  -  -        hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);  -  +        enum class ERuntime { +            Core2 = 0, +            Corei7 = 1, +            AVX2 = 2, +            AVX512 = 3 +        }; + +        ERuntime DetectCurrentRuntime(); + +        TCPUFeatures RuntimeCpuFeatures(ERuntime runtime); + +        hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures); +          struct TImpl { -            hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);  - -            hs_error_t (*Scan)(const hs_database_t* db, const char* data,  -                                unsigned length, unsigned flags, hs_scratch_t* scratch,  -                                match_event_handler onEvent, void* userCtx);  -  -            hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);  -  -            hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);  -  -            TImpl() : TImpl(DetectCurrentRuntime()) {}  -  -            explicit TImpl(ERuntime runtime);  +            hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch); + +            hs_error_t (*Scan)(const hs_database_t* db, const char* data, +                                unsigned length, unsigned flags, hs_scratch_t* scratch, +                                match_event_handler onEvent, void* userCtx); + +            hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length); + +            hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info); + +            TImpl() : TImpl(DetectCurrentRuntime()) {} + +            explicit TImpl(ERuntime runtime);          }; -  -        TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);  -  -        TDatabase CompileMulti(  -            const TVector<const char*>& regexs,  -            const TVector<unsigned int>& flags,  -            const TVector<unsigned int>& ids,  -            hs_platform_info_t* platform,  -            const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);  -  -        // We need to parametrize Scan and Matches functions for testing purposes  -        template<typename TCallback>  -        void Scan(  -            const TDatabase& db,  -            const TScratch& scratch,  -            const TStringBuf& text,  -            TCallback& callback, // applied to index of matched regex  -            const TImpl& impl  -        ) {  -            struct TCallbackWrapper {  -                static int EventHandler(  -                    unsigned int id,  -                    unsigned long long from,  -                    unsigned long long to,  -                    unsigned int flags,  -                    void* ctx) {  -                    Y_UNUSED(flags);  -                    TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);  -                    if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {  -                        return callback2(id, from, to);  -                    } else {  -                        callback2(id, from, to);  -                        return 0;  -                    }  -                }  -            };  -            unsigned int flags = 0; // unused at present  -            hs_error_t status = impl.Scan(  -                db.Get(),  -                text.begin(),  -                text.size(),  -                flags,  -                scratch.Get(),  -                &TCallbackWrapper::EventHandler,  -                &callback);  -            if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {  -                ythrow yexception() << "Failed to scan against text: " << text;  -            }  -        }  -  -        bool Matches(  -            const TDatabase& db,  -            const TScratch& scratch,  -            const TStringBuf& text,  -            const TImpl& impl);  + +        TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform); + +        TDatabase CompileMulti( +            const TVector<const char*>& regexs, +            const TVector<unsigned int>& flags, +            const TVector<unsigned int>& ids, +            hs_platform_info_t* platform, +            const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); + +        // We need to parametrize Scan and Matches functions for testing purposes +        template<typename TCallback> +        void Scan( +            const TDatabase& db, +            const TScratch& scratch, +            const TStringBuf& text, +            TCallback& callback, // applied to index of matched regex +            const TImpl& impl +        ) { +            struct TCallbackWrapper { +                static int EventHandler( +                    unsigned int id, +                    unsigned long long from, +                    unsigned long long to, +                    unsigned int flags, +                    void* ctx) { +                    Y_UNUSED(flags); +                    TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx); +                    if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) { +                        return callback2(id, from, to); +                    } else { +                        callback2(id, from, to); +                        return 0; +                    } +                } +            }; +            unsigned int flags = 0; // unused at present +            hs_error_t status = impl.Scan( +                db.Get(), +                text.begin(), +                text.size(), +                flags, +                scratch.Get(), +                &TCallbackWrapper::EventHandler, +                &callback); +            if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) { +                ythrow yexception() << "Failed to scan against text: " << text; +            } +        } + +        bool Matches( +            const TDatabase& db, +            const TScratch& scratch, +            const TStringBuf& text, +            const TImpl& impl);      }      TDatabase Compile(const TStringBuf& regex, unsigned int flags); -    TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);  -  +    TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures); + +    TDatabase CompileMulti( +        const TVector<const char*>& regexs, +        const TVector<unsigned int>& flags, +        const TVector<unsigned int>& ids, +        const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); +      TDatabase CompileMulti(          const TVector<const char*>& regexs,          const TVector<unsigned int>& flags,          const TVector<unsigned int>& ids, +        TCPUFeatures cpuFeatures,          const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr); -    TDatabase CompileMulti(  -        const TVector<const char*>& regexs,  -        const TVector<unsigned int>& flags,  -        const TVector<unsigned int>& ids,  -        TCPUFeatures cpuFeatures,  -        const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);  -       TScratch MakeScratch(const TDatabase& db);      void GrowScratch(TScratch& scratch, const TDatabase& db);      TScratch CloneScratch(const TScratch& scratch); -    template<typename TCallback>  +    template<typename TCallback>      void Scan(          const TDatabase& db,          const TScratch& scratch,          const TStringBuf& text,          TCallback& callback // applied to index of matched regex      ) { -        NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());  +        NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());      }      bool Matches( diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 7abbaa4b088..9caa53f2e7f 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -4,12 +4,12 @@  #include <util/generic/set.h> -#include <array>  -#include <algorithm>  -  +#include <array> +#include <algorithm> +  Y_UNIT_TEST_SUITE(HyperscanWrappers) {      using namespace NHyperscan; -    using namespace NHyperscan::NPrivate;  +    using namespace NHyperscan::NPrivate;      Y_UNIT_TEST(CompileAndScan) {          TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); @@ -120,112 +120,112 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {          scratch1.Reset();          UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo"));      } -  -    class TSimpleSingleRegex {  -    public:  -        static TDatabase Compile(TCPUFeatures cpuFeatures) {  -            return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures);  -        }  -        static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {  -            NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);  -            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));  -            UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));  -        }  -    };  -  -    // This regex uses AVX2 instructions on long (>70) texts.  -    // It crushes when compiled for machine with AVX2 and run on machine without it.  -    class TAvx2SingleRegex {  -        public:  -        static TDatabase Compile(TCPUFeatures cpuFeatures) {  -            auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+"  -                         "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}";  -            unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY;  -            return NHyperscan::Compile(regex, flags, cpuFeatures);  -        }  -        static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {  -            NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);  -            UNIT_ASSERT(NHyperscan::NPrivate::Matches(  -                db,  -                scratch,  -                "_________________________________________________________________"  -                "фу.bar"  -                "_________________________________________________________________",  -                impl));  -            UNIT_ASSERT(!NHyperscan::NPrivate::Matches(  -                db,  -                scratch,  -                "_________________________________________________________________"  -                "фу"  -                "_________________________________________________________________",  -                impl));  -        }  -    };  -  -    class TSimpleMultiRegex {  -    public:  -        static TDatabase Compile(TCPUFeatures cpuFeatures) {  -            return NHyperscan::CompileMulti(  -                {  -                    "foo",  -                    "bar",  -                },  -                {  -                    HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,  -                    HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,  -                },  -                {  -                    42,  -                    241,  -                },  -                cpuFeatures);  -        }  -        static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) {  -            NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);  -  -            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl));  -            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl));  -            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl));  -            UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl));  -  -            TSet<unsigned int> foundIds;  -            auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {  -                foundIds.insert(id);  -            };  -            NHyperscan::NPrivate::Scan(  -                    db,  -                    scratch,  -                    "fooBaR",  -                    callback,  -                    impl);  -            UNIT_ASSERT_EQUAL(foundIds.size(), 2);  -            UNIT_ASSERT(foundIds.contains(42));  -            UNIT_ASSERT(foundIds.contains(241));  -        }  -    };  -  -    template <class Regex>  -    void TestCrossPlatformCompile() {  -        const std::array<ERuntime, 4> runtimes = {  -            ERuntime::Core2,  -            ERuntime::Corei7,  -            ERuntime::AVX2,  -            ERuntime::AVX512  -        };  -  -        // Unfortunately, we cannot emulate runtimes with more capabilities than current machine.  -        auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime());  -        Y_ASSERT(currentRuntimeIter != runtimes.cend());  -  -        for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) {  -            auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime));  -            Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime});  -        }  -    }  -  -    Y_UNIT_TEST(CrossPlatformCompile) {  -        TestCrossPlatformCompile<TSimpleSingleRegex>();  -        TestCrossPlatformCompile<TAvx2SingleRegex>();  -        TestCrossPlatformCompile<TSimpleMultiRegex>();  -    }  + +    class TSimpleSingleRegex { +    public: +        static TDatabase Compile(TCPUFeatures cpuFeatures) { +            return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); +        } +        static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { +            NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); +            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); +            UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); +        } +    }; + +    // This regex uses AVX2 instructions on long (>70) texts. +    // It crushes when compiled for machine with AVX2 and run on machine without it. +    class TAvx2SingleRegex { +        public: +        static TDatabase Compile(TCPUFeatures cpuFeatures) { +            auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" +                         "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; +            unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; +            return NHyperscan::Compile(regex, flags, cpuFeatures); +        } +        static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { +            NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); +            UNIT_ASSERT(NHyperscan::NPrivate::Matches( +                db, +                scratch, +                "_________________________________________________________________" +                "фу.bar" +                "_________________________________________________________________", +                impl)); +            UNIT_ASSERT(!NHyperscan::NPrivate::Matches( +                db, +                scratch, +                "_________________________________________________________________" +                "фу" +                "_________________________________________________________________", +                impl)); +        } +    }; + +    class TSimpleMultiRegex { +    public: +        static TDatabase Compile(TCPUFeatures cpuFeatures) { +            return NHyperscan::CompileMulti( +                { +                    "foo", +                    "bar", +                }, +                { +                    HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, +                    HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, +                }, +                { +                    42, +                    241, +                }, +                cpuFeatures); +        } +        static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { +            NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + +            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); +            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); +            UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); +            UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + +            TSet<unsigned int> foundIds; +            auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { +                foundIds.insert(id); +            }; +            NHyperscan::NPrivate::Scan( +                    db, +                    scratch, +                    "fooBaR", +                    callback, +                    impl); +            UNIT_ASSERT_EQUAL(foundIds.size(), 2); +            UNIT_ASSERT(foundIds.contains(42)); +            UNIT_ASSERT(foundIds.contains(241)); +        } +    }; + +    template <class Regex> +    void TestCrossPlatformCompile() { +        const std::array<ERuntime, 4> runtimes = { +            ERuntime::Core2, +            ERuntime::Corei7, +            ERuntime::AVX2, +            ERuntime::AVX512 +        }; + +        // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. +        auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); +        Y_ASSERT(currentRuntimeIter != runtimes.cend()); + +        for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { +            auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); +            Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); +        } +    } + +    Y_UNIT_TEST(CrossPlatformCompile) { +        TestCrossPlatformCompile<TSimpleSingleRegex>(); +        TestCrossPlatformCompile<TAvx2SingleRegex>(); +        TestCrossPlatformCompile<TSimpleMultiRegex>(); +    }  } diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make index e58d93502c3..e99130ae186 100644 --- a/library/cpp/regex/hyperscan/ya.make +++ b/library/cpp/regex/hyperscan/ya.make @@ -15,5 +15,5 @@ SRCS(  )  END() -  -RECURSE_FOR_TESTS(ut)  + +RECURSE_FOR_TESTS(ut) | 
