aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/hyperscan/hyperscan.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/regex/hyperscan/hyperscan.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/regex/hyperscan/hyperscan.h')
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.h160
1 files changed, 160 insertions, 0 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h
new file mode 100644
index 0000000000..1c8f404389
--- /dev/null
+++ b/library/cpp/regex/hyperscan/hyperscan.h
@@ -0,0 +1,160 @@
+#pragma once
+
+#include <contrib/libs/hyperscan/src/hs.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+#include <util/system/cpu_id.h>
+
+namespace NHyperscan {
+ using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
+ constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
+ constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
+
+ template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
+ class TDeleter {
+ public:
+ template<typename T>
+ static void Destroy(T* ptr) {
+ NativeDeleter(ptr);
+ }
+ };
+
+ using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;
+
+ using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
+
+ class TCompileException : public yexception {
+ };
+
+
+ namespace NPrivate {
+ enum class ERuntime {
+ Core2 = 0,
+ Corei7 = 1,
+ AVX2 = 2,
+ AVX512 = 3
+ };
+
+ ERuntime DetectCurrentRuntime();
+
+ TCPUFeatures RuntimeCpuFeatures(ERuntime runtime);
+
+ hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures);
+
+ struct TImpl {
+ hs_error_t (*AllocScratch)(const hs_database_t* db, hs_scratch_t** scratch);
+
+ hs_error_t (*Scan)(const hs_database_t* db, const char* data,
+ unsigned length, unsigned flags, hs_scratch_t* scratch,
+ match_event_handler onEvent, void* userCtx);
+
+ hs_error_t (*SerializeDatabase)(const hs_database_t* db, char** bytes, size_t* serialized_length);
+
+ hs_error_t (*DeserializeDatabase)(const char* bytes, size_t length, hs_database_t** info);
+
+ TImpl() : TImpl(DetectCurrentRuntime()) {}
+
+ explicit TImpl(ERuntime runtime);
+ };
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform);
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ hs_platform_info_t* platform,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
+ // We need to parametrize Scan and Matches functions for testing purposes
+ template<typename TCallback>
+ void Scan(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ TCallback& callback, // applied to index of matched regex
+ const TImpl& impl
+ ) {
+ struct TCallbackWrapper {
+ static int EventHandler(
+ unsigned int id,
+ unsigned long long from,
+ unsigned long long to,
+ unsigned int flags,
+ void* ctx) {
+ Y_UNUSED(flags);
+ TCallback& callback2 = *reinterpret_cast<TCallback*>(ctx);
+ if constexpr (std::is_same_v<int, std::invoke_result_t<TCallback, unsigned int, unsigned long long, unsigned long long>>) {
+ return callback2(id, from, to);
+ } else {
+ callback2(id, from, to);
+ return 0;
+ }
+ }
+ };
+ unsigned int flags = 0; // unused at present
+ hs_error_t status = impl.Scan(
+ db.Get(),
+ text.begin(),
+ text.size(),
+ flags,
+ scratch.Get(),
+ &TCallbackWrapper::EventHandler,
+ &callback);
+ if (status != HS_SUCCESS && status != HS_SCAN_TERMINATED) {
+ ythrow yexception() << "Failed to scan against text: " << text;
+ }
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ const TImpl& impl);
+ }
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags);
+
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
+ TDatabase CompileMulti(
+ const TVector<const char*>& regexs,
+ const TVector<unsigned int>& flags,
+ const TVector<unsigned int>& ids,
+ TCPUFeatures cpuFeatures,
+ const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
+
+ TScratch MakeScratch(const TDatabase& db);
+
+ void GrowScratch(TScratch& scratch, const TDatabase& db);
+
+ TScratch CloneScratch(const TScratch& scratch);
+
+ template<typename TCallback>
+ void Scan(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
+ TCallback& callback // applied to index of matched regex
+ ) {
+ NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text);
+
+ TString Serialize(const TDatabase& db);
+
+ TDatabase Deserialize(const TStringBuf& serialization);
+}