aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/regex/hyperscan
diff options
context:
space:
mode:
authorbnagaev <bnagaev@yandex-team.ru>2022-02-10 16:47:04 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:47:04 +0300
commitd6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d (patch)
treed5dca6d44593f5e52556a1cc7b1ab0386e096ebe /library/cpp/regex/hyperscan
parent1861d4c1402bb2c67a3e6b43b51706081b74508a (diff)
downloadydb-d6449ba66291ff0c0d352c82e6eb3efb4c8a7e8d.tar.gz
Restoring authorship annotation for <bnagaev@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex/hyperscan')
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.cpp140
-rw-r--r--library/cpp/regex/hyperscan/hyperscan.h94
-rw-r--r--library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp188
-rw-r--r--library/cpp/regex/hyperscan/ut/ya.make14
-rw-r--r--library/cpp/regex/hyperscan/ya.make24
5 files changed, 230 insertions, 230 deletions
diff --git a/library/cpp/regex/hyperscan/hyperscan.cpp b/library/cpp/regex/hyperscan/hyperscan.cpp
index ba321f9c29..cb15d04ae5 100644
--- a/library/cpp/regex/hyperscan/hyperscan.cpp
+++ b/library/cpp/regex/hyperscan/hyperscan.cpp
@@ -1,5 +1,5 @@
-#include "hyperscan.h"
-
+#include "hyperscan.h"
+
#include <contrib/libs/hyperscan/runtime_core2/hs_common.h>
#include <contrib/libs/hyperscan/runtime_core2/hs_runtime.h>
#include <contrib/libs/hyperscan/runtime_corei7/hs_common.h>
@@ -11,11 +11,11 @@
#include <util/generic/singleton.h>
-namespace NHyperscan {
- using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>;
-
- using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
-
+namespace NHyperscan {
+ using TSerializedDatabase = THolder<char, TDeleter<decltype(&free), &free>>;
+
+ using TCompileError = THolder<hs_compile_error_t, TDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>>;
+
namespace NPrivate {
ERuntime DetectCurrentRuntime() {
if (NX86::HaveAVX512F() && NX86::HaveAVX512BW()) {
@@ -42,12 +42,12 @@ namespace NHyperscan {
case ERuntime::AVX512:
return CPU_FEATURES_AVX512;
}
- }
-
+ }
+
hs_platform_info_t MakePlatformInfo(TCPUFeatures cpuFeatures) {
hs_platform_info_t platformInfo{HS_TUNE_FAMILY_GENERIC, cpuFeatures, 0, 0};
return platformInfo;
- }
+ }
hs_platform_info_t MakeCurrentPlatformInfo() {
return MakePlatformInfo(RuntimeCpuFeatures(DetectCurrentRuntime()));
@@ -82,7 +82,7 @@ namespace NHyperscan {
SerializeDatabase = avx512_hs_serialize_database;
DeserializeDatabase = avx512_hs_deserialize_database;
}
- }
+ }
TDatabase Compile(const TStringBuf& regex, unsigned int flags, hs_platform_info_t* platform) {
hs_database_t* rawDb = nullptr;
@@ -97,12 +97,12 @@ namespace NHyperscan {
TDatabase db(rawDb);
NHyperscan::TCompileError compileError(rawCompileErr);
if (status != HS_SUCCESS) {
- ythrow TCompileException()
+ ythrow TCompileException()
<< "Failed to compile regex: " << regex << ". "
<< "Error message (hyperscan): " << compileError->message;
- }
+ }
return db;
- }
+ }
TDatabase CompileMulti(
const TVector<const char*>& regexs,
@@ -181,8 +181,8 @@ namespace NHyperscan {
TDatabase Compile(const TStringBuf& regex, unsigned int flags) {
auto platformInfo = NPrivate::MakeCurrentPlatformInfo();
return NPrivate::Compile(regex, flags, &platformInfo);
- }
-
+ }
+
TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures) {
auto platformInfo = NPrivate::MakePlatformInfo(cpuFeatures);
return NPrivate::Compile(regex, flags, &platformInfo);
@@ -209,74 +209,74 @@ namespace NHyperscan {
return NPrivate::CompileMulti(regexs, flags, ids, &platformInfo, extendedParameters);
}
- TScratch MakeScratch(const TDatabase& db) {
- hs_scratch_t* rawScratch = nullptr;
+ TScratch MakeScratch(const TDatabase& db) {
+ hs_scratch_t* rawScratch = nullptr;
hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
- NHyperscan::TScratch scratch(rawScratch);
- if (status != HS_SUCCESS) {
- ythrow yexception() << "Failed to make scratch for hyperscan database";
- }
- return scratch;
- }
-
- void GrowScratch(TScratch& scratch, const TDatabase& db) {
- hs_scratch_t* rawScratch = scratch.Get();
+ NHyperscan::TScratch scratch(rawScratch);
+ if (status != HS_SUCCESS) {
+ ythrow yexception() << "Failed to make scratch for hyperscan database";
+ }
+ return scratch;
+ }
+
+ void GrowScratch(TScratch& scratch, const TDatabase& db) {
+ hs_scratch_t* rawScratch = scratch.Get();
hs_error_t status = Singleton<NPrivate::TImpl>()->AllocScratch(db.Get(), &rawScratch);
- if (rawScratch != scratch.Get()) {
+ if (rawScratch != scratch.Get()) {
Y_UNUSED(scratch.Release()); // freed by hs_alloc_scratch
- scratch.Reset(rawScratch);
- }
- if (status != HS_SUCCESS) {
- ythrow yexception() << "Failed to make grow scratch for hyperscan database";
- }
- }
-
- TScratch CloneScratch(const TScratch& scratch) {
- hs_scratch_t* rawScratch = nullptr;
- hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch);
- TScratch scratchCopy(rawScratch);
- if (status != HS_SUCCESS) {
- ythrow yexception() << "Failed to clone scratch for hyperscan database";
- }
- return scratchCopy;
- }
-
- bool Matches(
- const TDatabase& db,
- const TScratch& scratch,
+ scratch.Reset(rawScratch);
+ }
+ if (status != HS_SUCCESS) {
+ ythrow yexception() << "Failed to make grow scratch for hyperscan database";
+ }
+ }
+
+ TScratch CloneScratch(const TScratch& scratch) {
+ hs_scratch_t* rawScratch = nullptr;
+ hs_error_t status = hs_clone_scratch(scratch.Get(), &rawScratch);
+ TScratch scratchCopy(rawScratch);
+ if (status != HS_SUCCESS) {
+ ythrow yexception() << "Failed to clone scratch for hyperscan database";
+ }
+ return scratchCopy;
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
const TStringBuf& text)
{
return NPrivate::Matches(db, scratch, text, *Singleton<NPrivate::TImpl>());
- }
-
+ }
+
TString Serialize(const TDatabase& db) {
- char* databaseBytes = nullptr;
- size_t databaseLength;
+ char* databaseBytes = nullptr;
+ size_t databaseLength;
hs_error_t status = Singleton<NPrivate::TImpl>()->SerializeDatabase(
- db.Get(),
- &databaseBytes,
+ db.Get(),
+ &databaseBytes,
&databaseLength);
- TSerializedDatabase serialization(databaseBytes);
- if (status != HS_SUCCESS) {
- ythrow yexception() << "Failed to serialize hyperscan database";
- }
+ TSerializedDatabase serialization(databaseBytes);
+ if (status != HS_SUCCESS) {
+ ythrow yexception() << "Failed to serialize hyperscan database";
+ }
return TString(serialization.Get(), databaseLength);
- }
-
- TDatabase Deserialize(const TStringBuf& serialization) {
- hs_database_t* rawDb = nullptr;
+ }
+
+ TDatabase Deserialize(const TStringBuf& serialization) {
+ hs_database_t* rawDb = nullptr;
hs_error_t status = Singleton<NPrivate::TImpl>()->DeserializeDatabase(
- serialization.begin(),
- serialization.size(),
+ serialization.begin(),
+ serialization.size(),
&rawDb);
- TDatabase db(rawDb);
- if (status != HS_SUCCESS) {
+ TDatabase db(rawDb);
+ if (status != HS_SUCCESS) {
if (status == HS_DB_PLATFORM_ERROR) {
ythrow yexception() << "Serialized Hyperscan database is incompatible with current CPU";
} else {
ythrow yexception() << "Failed to deserialize hyperscan database";
}
- }
- return db;
- }
-}
+ }
+ return db;
+ }
+}
diff --git a/library/cpp/regex/hyperscan/hyperscan.h b/library/cpp/regex/hyperscan/hyperscan.h
index 1c8f404389..9aabcbfdd9 100644
--- a/library/cpp/regex/hyperscan/hyperscan.h
+++ b/library/cpp/regex/hyperscan/hyperscan.h
@@ -1,34 +1,34 @@
-#pragma once
-
-#include <contrib/libs/hyperscan/src/hs.h>
-
-#include <util/generic/ptr.h>
-#include <util/generic/strbuf.h>
-#include <util/generic/vector.h>
-#include <util/generic/yexception.h>
+#pragma once
+
+#include <contrib/libs/hyperscan/src/hs.h>
+
+#include <util/generic/ptr.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
#include <util/system/cpu_id.h>
-
-namespace NHyperscan {
+
+namespace NHyperscan {
using TCPUFeatures = decltype(hs_platform_info_t::cpu_features);
constexpr TCPUFeatures CPU_FEATURES_AVX2 = HS_CPU_FEATURES_AVX2;
constexpr TCPUFeatures CPU_FEATURES_AVX512 = HS_CPU_FEATURES_AVX512 | HS_CPU_FEATURES_AVX2;
template<typename TNativeDeleter, TNativeDeleter NativeDeleter>
- class TDeleter {
- public:
+ class TDeleter {
+ public:
template<typename T>
- static void Destroy(T* ptr) {
- NativeDeleter(ptr);
- }
- };
-
- using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;
-
- using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
-
+ static void Destroy(T* ptr) {
+ NativeDeleter(ptr);
+ }
+ };
+
+ using TDatabase = THolder<hs_database_t, TDeleter<decltype(&hs_free_database), &hs_free_database>>;
+
+ using TScratch = THolder<hs_scratch_t, TDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
+
class TCompileException : public yexception {
- };
-
+ };
+
namespace NPrivate {
enum class ERuntime {
@@ -116,16 +116,16 @@ namespace NHyperscan {
const TImpl& impl);
}
- TDatabase Compile(const TStringBuf& regex, unsigned int flags);
-
+ TDatabase Compile(const TStringBuf& regex, unsigned int flags);
+
TDatabase Compile(const TStringBuf& regex, unsigned int flags, TCPUFeatures cpuFeatures);
- TDatabase CompileMulti(
+ TDatabase CompileMulti(
const TVector<const char*>& regexs,
const TVector<unsigned int>& flags,
const TVector<unsigned int>& ids,
const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
-
+
TDatabase CompileMulti(
const TVector<const char*>& regexs,
const TVector<unsigned int>& flags,
@@ -133,28 +133,28 @@ namespace NHyperscan {
TCPUFeatures cpuFeatures,
const TVector<const hs_expr_ext_t*>* extendedParameters = nullptr);
- TScratch MakeScratch(const TDatabase& db);
-
- void GrowScratch(TScratch& scratch, const TDatabase& db);
-
- TScratch CloneScratch(const TScratch& scratch);
-
+ TScratch MakeScratch(const TDatabase& db);
+
+ void GrowScratch(TScratch& scratch, const TDatabase& db);
+
+ TScratch CloneScratch(const TScratch& scratch);
+
template<typename TCallback>
- void Scan(
- const TDatabase& db,
- const TScratch& scratch,
- const TStringBuf& text,
+ void Scan(
+ const TDatabase& db,
+ const TScratch& scratch,
+ const TStringBuf& text,
TCallback& callback // applied to index of matched regex
- ) {
+ ) {
NPrivate::Scan<TCallback>(db, scratch, text, callback, *Singleton<NPrivate::TImpl>());
- }
-
- bool Matches(
- const TDatabase& db,
- const TScratch& scratch,
+ }
+
+ bool Matches(
+ const TDatabase& db,
+ const TScratch& scratch,
const TStringBuf& text);
-
+
TString Serialize(const TDatabase& db);
-
- TDatabase Deserialize(const TStringBuf& serialization);
-}
+
+ TDatabase Deserialize(const TStringBuf& serialization);
+}
diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
index 9caa53f2e7..28232b6982 100644
--- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
+++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp
@@ -1,125 +1,125 @@
#include <library/cpp/regex/hyperscan/hyperscan.h>
-
+
#include <library/cpp/testing/unittest/registar.h>
-
-#include <util/generic/set.h>
-
+
+#include <util/generic/set.h>
+
#include <array>
#include <algorithm>
Y_UNIT_TEST_SUITE(HyperscanWrappers) {
- using namespace NHyperscan;
+ using namespace NHyperscan;
using namespace NHyperscan::NPrivate;
-
+
Y_UNIT_TEST(CompileAndScan) {
- TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
- TScratch scratch = MakeScratch(db);
-
- unsigned int foundId = 42;
+ TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
+ TScratch scratch = MakeScratch(db);
+
+ unsigned int foundId = 42;
auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {
- foundId = id;
- };
- NHyperscan::Scan(
- db,
- scratch,
- "abc",
+ foundId = id;
+ };
+ NHyperscan::Scan(
+ db,
+ scratch,
+ "abc",
callback);
- UNIT_ASSERT_EQUAL(foundId, 0);
- }
-
+ UNIT_ASSERT_EQUAL(foundId, 0);
+ }
+
Y_UNIT_TEST(Matches) {
- NHyperscan::TDatabase db = NHyperscan::Compile(
- "a.c",
+ NHyperscan::TDatabase db = NHyperscan::Compile(
+ "a.c",
HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
- UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc"));
- UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo"));
- }
-
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+ UNIT_ASSERT(NHyperscan::Matches(db, scratch, "abc"));
+ UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "foo"));
+ }
+
Y_UNIT_TEST(Multi) {
- NHyperscan::TDatabase db = NHyperscan::CompileMulti(
- {
- "foo",
- "bar",
- },
- {
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
- HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,
- },
- {
- 42,
- 241,
+ NHyperscan::TDatabase db = NHyperscan::CompileMulti(
+ {
+ "foo",
+ "bar",
+ },
+ {
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH,
+ HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS,
+ },
+ {
+ 42,
+ 241,
});
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
-
- UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo"));
- UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar"));
- UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR"));
- UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO"));
-
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db);
+
+ UNIT_ASSERT(NHyperscan::Matches(db, scratch, "foo"));
+ UNIT_ASSERT(NHyperscan::Matches(db, scratch, "bar"));
+ UNIT_ASSERT(NHyperscan::Matches(db, scratch, "BAR"));
+ UNIT_ASSERT(!NHyperscan::Matches(db, scratch, "FOO"));
+
TSet<unsigned int> foundIds;
auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) {
- foundIds.insert(id);
- };
- NHyperscan::Scan(
- db,
- scratch,
- "fooBaR",
+ foundIds.insert(id);
+ };
+ NHyperscan::Scan(
+ db,
+ scratch,
+ "fooBaR",
callback);
- UNIT_ASSERT_EQUAL(foundIds.size(), 2);
+ UNIT_ASSERT_EQUAL(foundIds.size(), 2);
UNIT_ASSERT(foundIds.contains(42));
UNIT_ASSERT(foundIds.contains(241));
- }
-
- // https://ml.yandex-team.ru/thread/2370000002965712422/
+ }
+
+ // https://ml.yandex-team.ru/thread/2370000002965712422/
Y_UNIT_TEST(MultiRegression) {
- NHyperscan::CompileMulti(
- {
- "aa.bb/cc.dd",
- },
- {
- HS_FLAG_UTF8,
- },
- {
- 0,
+ NHyperscan::CompileMulti(
+ {
+ "aa.bb/cc.dd",
+ },
+ {
+ HS_FLAG_UTF8,
+ },
+ {
+ 0,
});
- }
-
+ }
+
Y_UNIT_TEST(Serialize) {
- NHyperscan::TDatabase db = NHyperscan::Compile(
- "foo",
+ NHyperscan::TDatabase db = NHyperscan::Compile(
+ "foo",
HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
TString serialization = Serialize(db);
- db.Reset();
- TDatabase db2 = Deserialize(serialization);
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2);
-
- UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo"));
- UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO"));
- }
-
+ db.Reset();
+ TDatabase db2 = Deserialize(serialization);
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db2);
+
+ UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "foo"));
+ UNIT_ASSERT(!NHyperscan::Matches(db2, scratch, "FOO"));
+ }
+
Y_UNIT_TEST(GrowScratch) {
- NHyperscan::TDatabase db1 = NHyperscan::Compile(
- "foo",
+ NHyperscan::TDatabase db1 = NHyperscan::Compile(
+ "foo",
HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
- NHyperscan::TDatabase db2 = NHyperscan::Compile(
- "longer\\w\\w\\wpattern",
+ NHyperscan::TDatabase db2 = NHyperscan::Compile(
+ "longer\\w\\w\\wpattern",
HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_UTF8);
- NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1);
- NHyperscan::GrowScratch(scratch, db2);
- UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo"));
- UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern"));
- }
-
+ NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db1);
+ NHyperscan::GrowScratch(scratch, db2);
+ UNIT_ASSERT(NHyperscan::Matches(db1, scratch, "foo"));
+ UNIT_ASSERT(NHyperscan::Matches(db2, scratch, "longerWWWpattern"));
+ }
+
Y_UNIT_TEST(CloneScratch) {
- NHyperscan::TDatabase db = NHyperscan::Compile(
- "foo",
+ NHyperscan::TDatabase db = NHyperscan::Compile(
+ "foo",
HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH);
- NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db);
- NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1);
- scratch1.Reset();
- UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo"));
- }
+ NHyperscan::TScratch scratch1 = NHyperscan::MakeScratch(db);
+ NHyperscan::TScratch scratch2 = NHyperscan::CloneScratch(scratch1);
+ scratch1.Reset();
+ UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo"));
+ }
class TSimpleSingleRegex {
public:
@@ -228,4 +228,4 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) {
TestCrossPlatformCompile<TAvx2SingleRegex>();
TestCrossPlatformCompile<TSimpleMultiRegex>();
}
-}
+}
diff --git a/library/cpp/regex/hyperscan/ut/ya.make b/library/cpp/regex/hyperscan/ut/ya.make
index da67b88672..c255408521 100644
--- a/library/cpp/regex/hyperscan/ut/ya.make
+++ b/library/cpp/regex/hyperscan/ut/ya.make
@@ -1,13 +1,13 @@
UNITTEST()
-
+
PEERDIR(
library/cpp/regex/hyperscan
)
OWNER(g:antiinfra)
-
-SRCS(
- hyperscan_ut.cpp
-)
-
-END()
+
+SRCS(
+ hyperscan_ut.cpp
+)
+
+END()
diff --git a/library/cpp/regex/hyperscan/ya.make b/library/cpp/regex/hyperscan/ya.make
index e99130ae18..a2ea918380 100644
--- a/library/cpp/regex/hyperscan/ya.make
+++ b/library/cpp/regex/hyperscan/ya.make
@@ -1,19 +1,19 @@
-LIBRARY()
-
+LIBRARY()
+
OWNER(g:antiinfra)
-
-PEERDIR(
- contrib/libs/hyperscan
+
+PEERDIR(
+ contrib/libs/hyperscan
contrib/libs/hyperscan/runtime_core2
contrib/libs/hyperscan/runtime_corei7
contrib/libs/hyperscan/runtime_avx2
contrib/libs/hyperscan/runtime_avx512
-)
-
-SRCS(
- hyperscan.cpp
-)
-
-END()
+)
+
+SRCS(
+ hyperscan.cpp
+)
+
+END()
RECURSE_FOR_TESTS(ut)