diff options
author | jakovenko-dm <jakovenko-dm@yandex-team.ru> | 2022-02-10 16:48:06 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:48:06 +0300 |
commit | 7077baee21e33a3ad2e790527b1c50b22c244db3 (patch) | |
tree | e719eb81a7dbb542f49340ad8c36c65d58ac42f6 /library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | |
parent | 4282ec504ababea092138c3af45d5399d01c194a (diff) | |
download | ydb-7077baee21e33a3ad2e790527b1c50b22c244db3.tar.gz |
Restoring authorship annotation for <jakovenko-dm@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp')
-rw-r--r-- | library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp | 224 |
1 files changed, 112 insertions, 112 deletions
diff --git a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp index 9caa53f2e7..7abbaa4b08 100644 --- a/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp +++ b/library/cpp/regex/hyperscan/ut/hyperscan_ut.cpp @@ -4,12 +4,12 @@ #include <util/generic/set.h> -#include <array> -#include <algorithm> - +#include <array> +#include <algorithm> + Y_UNIT_TEST_SUITE(HyperscanWrappers) { using namespace NHyperscan; - using namespace NHyperscan::NPrivate; + using namespace NHyperscan::NPrivate; Y_UNIT_TEST(CompileAndScan) { TDatabase db = Compile("a.c", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH); @@ -120,112 +120,112 @@ Y_UNIT_TEST_SUITE(HyperscanWrappers) { scratch1.Reset(); UNIT_ASSERT(NHyperscan::Matches(db, scratch2, "foo")); } - - class TSimpleSingleRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); - } - }; - - // This regex uses AVX2 instructions on long (>70) texts. - // It crushes when compiled for machine with AVX2 and run on machine without it. - class TAvx2SingleRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" - "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; - unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; - return NHyperscan::Compile(regex, flags, cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - UNIT_ASSERT(NHyperscan::NPrivate::Matches( - db, - scratch, - "_________________________________________________________________" - "фу.bar" - "_________________________________________________________________", - impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches( - db, - scratch, - "_________________________________________________________________" - "фу" - "_________________________________________________________________", - impl)); - } - }; - - class TSimpleMultiRegex { - public: - static TDatabase Compile(TCPUFeatures cpuFeatures) { - return NHyperscan::CompileMulti( - { - "foo", - "bar", - }, - { - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, - HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, - }, - { - 42, - 241, - }, - cpuFeatures); - } - static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { - NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); - - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); - UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); - UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); - - TSet<unsigned int> foundIds; - auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { - foundIds.insert(id); - }; - NHyperscan::NPrivate::Scan( - db, - scratch, - "fooBaR", - callback, - impl); - UNIT_ASSERT_EQUAL(foundIds.size(), 2); - UNIT_ASSERT(foundIds.contains(42)); - UNIT_ASSERT(foundIds.contains(241)); - } - }; - - template <class Regex> - void TestCrossPlatformCompile() { - const std::array<ERuntime, 4> runtimes = { - ERuntime::Core2, - ERuntime::Corei7, - ERuntime::AVX2, - ERuntime::AVX512 - }; - - // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. - auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); - Y_ASSERT(currentRuntimeIter != runtimes.cend()); - - for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { - auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); - Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); - } - } - - Y_UNIT_TEST(CrossPlatformCompile) { - TestCrossPlatformCompile<TSimpleSingleRegex>(); - TestCrossPlatformCompile<TAvx2SingleRegex>(); - TestCrossPlatformCompile<TSimpleMultiRegex>(); - } + + class TSimpleSingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::Compile("foo", HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + } + }; + + // This regex uses AVX2 instructions on long (>70) texts. + // It crushes when compiled for machine with AVX2 and run on machine without it. + class TAvx2SingleRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + auto regex = "[ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё]+" + "[.][\\-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz]{2,5}"; + unsigned int flags = HS_FLAG_UTF8 | HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY; + return NHyperscan::Compile(regex, flags, cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + UNIT_ASSERT(NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу.bar" + "_________________________________________________________________", + impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches( + db, + scratch, + "_________________________________________________________________" + "фу" + "_________________________________________________________________", + impl)); + } + }; + + class TSimpleMultiRegex { + public: + static TDatabase Compile(TCPUFeatures cpuFeatures) { + return NHyperscan::CompileMulti( + { + "foo", + "bar", + }, + { + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH, + HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_CASELESS, + }, + { + 42, + 241, + }, + cpuFeatures); + } + static void Check(const TDatabase& db, const NHyperscan::NPrivate::TImpl& impl) { + NHyperscan::TScratch scratch = NHyperscan::MakeScratch(db); + + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "foo", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "bar", impl)); + UNIT_ASSERT(NHyperscan::NPrivate::Matches(db, scratch, "BAR", impl)); + UNIT_ASSERT(!NHyperscan::NPrivate::Matches(db, scratch, "FOO", impl)); + + TSet<unsigned int> foundIds; + auto callback = [&](unsigned int id, unsigned long long /* from */, unsigned long long /* to */) { + foundIds.insert(id); + }; + NHyperscan::NPrivate::Scan( + db, + scratch, + "fooBaR", + callback, + impl); + UNIT_ASSERT_EQUAL(foundIds.size(), 2); + UNIT_ASSERT(foundIds.contains(42)); + UNIT_ASSERT(foundIds.contains(241)); + } + }; + + template <class Regex> + void TestCrossPlatformCompile() { + const std::array<ERuntime, 4> runtimes = { + ERuntime::Core2, + ERuntime::Corei7, + ERuntime::AVX2, + ERuntime::AVX512 + }; + + // Unfortunately, we cannot emulate runtimes with more capabilities than current machine. + auto currentRuntimeIter = std::find(runtimes.cbegin(), runtimes.cend(), DetectCurrentRuntime()); + Y_ASSERT(currentRuntimeIter != runtimes.cend()); + + for (auto targetRuntime = runtimes.cbegin(); targetRuntime <= currentRuntimeIter; ++targetRuntime) { + auto db = Regex::Compile(RuntimeCpuFeatures(*targetRuntime)); + Regex::Check(db, NHyperscan::NPrivate::TImpl{*targetRuntime}); + } + } + + Y_UNIT_TEST(CrossPlatformCompile) { + TestCrossPlatformCompile<TSimpleSingleRegex>(); + TestCrossPlatformCompile<TAvx2SingleRegex>(); + TestCrossPlatformCompile<TSimpleMultiRegex>(); + } } |