aboutsummaryrefslogblamecommitdiffstats
path: root/library/cpp/regex/hyperscan/hyperscan.h
blob: 1c8f4043892e31a9647d6649438863a14002c91f (plain) (tree)
1
2
3
4
5
6
7
8
9






                                          
                               
                      



                                                                                               
                    
                            







                                                                                                       
                                                 
      
 
                        











                                                                      
                      











                                                                                                              
          





















































                                                                                                                                               
     
                                                                   



                                                                                             
                                                                           
 


                                           
                                 
                                                                           




                                                             
                                


                                
                                                                
       
                                                                                              



                                
                                
 
                                           

                                                           
#pragma once

#include <contrib/libs/hyperscan/src/hs.h>

#include <util/generic/ptr.h>
#include <util/generic/strbuf.h>
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
#include <util/system/cpu_id.h>

namespace NHyperscan {
    using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
    constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
    constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;

    template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
    class TDeleter {
    public:
        template<typename T>
        static void Destroy(T* ptr) {
            NativeDeleter(ptr);
        }
    };

    using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;

    using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;

    class TCompileException : public yexception {
    };


    namespace NPrivate {
        enum class ERuntime {
            Core2 = 0,
            Corei7 = 1,
            AVX2 = 2,
            AVX512 = 3
        };

        ERuntime DetectCurrentRuntime();

        TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);

        hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);

        struct TImpl {
            hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);

            hs_error_t (*Scan)(const hs_database_t* db, const char* data,
                                unsigned length, unsigned flags, hs_scratch_t* scratch,
                                match_event_handler onEvent, void* userCtx);

            hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);

            hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);

            TImpl() : TImpl(DetectCurrentRuntime()) {}

            explicit TImpl(ERuntime runtime);
        };

        TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);

        TDatabase CompileMulti(
            const TVector<const char*>& regexs,
            const TVector<unsigned int>& flags,
            const TVector<unsigned int>& ids,
            hs_platform_info_t* platform,
            const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);

        // We need to parametrize Scan and Matches functions for testing purposes
        template<typename TCallback>
        void Scan(
            const TDatabase& db,
            const TScratch& scratch,
            const TStringBuf& text,
            TCallback& callback, // applied to index of matched regex
            const TImpl& impl
        ) {
            struct TCallbackWrapper {
                static int EventHandler(
                    unsigned int id,
                    unsigned long long from,
                    unsigned long long to,
                    unsigned int flags,
                    void* ctx) {
                    Y_UNUSED(flags);
                    TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
                    if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
                        return callback2(id, from, to);
                    } else {
                        callback2(id, from, to);
                        return 0;
                    }
                }
            };
            unsigned int flags = 0; // unused at present
            hs_error_t status = impl.Scan(
                db.Get(),
                text.begin(),
                text.size(),
                flags,
                scratch.Get(),
                &TCallbackWrapper::EventHandler,
                &callback);
            if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
                ythrow yexception() << "Failed to scan against text: " << text;
            }
        }

        bool Matches(
            const TDatabase& db,
            const TScratch& scratch,
            const TStringBuf& text,
            const TImpl& impl);
    }

    TDatabase Compile(const TStringBuf& regex, unsigned int flags);

    TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);

    TDatabase CompileMulti(
        const TVector<const char*>& regexs,
        const TVector<unsigned int>& flags,
        const TVector<unsigned int>& ids,
        const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);

    TDatabase CompileMulti(
        const TVector<const char*>& regexs,
        const TVector<unsigned int>& flags,
        const TVector<unsigned int>& ids,
        TCPUFeatures cpuFeatures,
        const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);

    TScratch MakeScratch(const TDatabase& db);

    void GrowScratch(TScratch& scratch, const TDatabase& db);

    TScratch CloneScratch(const TScratch& scratch);

    template<typename TCallback>
    void Scan(
        const TDatabase& db,
        const TScratch& scratch,
        const TStringBuf& text,
        TCallback& callback // applied to index of matched regex
    ) {
        NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
    }

    bool Matches(
        const TDatabase& db,
        const TScratch& scratch,
        const TStringBuf& text);

    TString Serialize(const TDatabase& db);

    TDatabase Deserialize(const TStringBuf& serialization);
}