diff options
author | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
---|---|---|
committer | monster <monster@ydb.tech> | 2022-07-07 14:41:37 +0300 |
commit | 06e5c21a835c0e923506c4ff27929f34e00761c2 (patch) | |
tree | 75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /util/charset | |
parent | 03f024c4412e3aa613bb543cf1660176320ba8f4 (diff) | |
download | ydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz |
fix ya.make
Diffstat (limited to 'util/charset')
-rw-r--r-- | util/charset/benchmark/to_lower/main.cpp | 61 | ||||
-rw-r--r-- | util/charset/benchmark/to_lower/metrics/main.py | 5 | ||||
-rw-r--r-- | util/charset/benchmark/utf8_to_wide/main.cpp | 161 | ||||
-rw-r--r-- | util/charset/benchmark/utf8_to_wide/metrics/main.py | 5 |
4 files changed, 0 insertions, 232 deletions
diff --git a/util/charset/benchmark/to_lower/main.cpp b/util/charset/benchmark/to_lower/main.cpp deleted file mode 100644 index e95fdc2371..0000000000 --- a/util/charset/benchmark/to_lower/main.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include <library/cpp/testing/benchmark/bench.h> - -#include <util/charset/wide.h> -#include <util/generic/singleton.h> -#include <util/generic/vector.h> -#include <util/generic/string.h> - -static const auto ShortAscii = UTF8ToWide("hELlo"); -static const auto LongAscii = UTF8ToWide( - "The first plane, plane 0, the Basic Multilingual Plane (BMP) contains characters for almost " - "all modern languages, and a large number of symbols. A primary objective for the BMP is to " - "support the unification of prior character sets as well as characters for writing. Most of " - "the assigned code points in the BMP are used to encode Chinese, Japanese, and Korean (CJK) " - "characters."); - -static const auto ShortRussian = UTF8ToWide("пРИвет"); -static const auto LongRussian = UTF8ToWide( - "Плоскость 0 (Основная многоязычная плоскость, англ. Basic Multilingual Plane, BMP) отведена " - "для символов практически всех современных письменностей и большого числа специальных символов. " - "Большая часть таблицы занята китайско-японскими иероглифами и своеобразными корейскими" - "буквами. В Юникоде 10.0 в этой плоскости представлены следующие блоки"); - -#define DEFINE_INPLACE_BENCH(s) \ - Y_CPU_BENCHMARK(s##CopyDetach, iface) { \ - for (size_t i = 0, iEnd = iface.Iterations(); i < iEnd; ++i) { \ - NBench::Clobber(); \ - auto copy = s; \ - NBench::Escape(copy.Detach()); \ - NBench::Clobber(); \ - } \ - } \ - \ - Y_CPU_BENCHMARK(s##Inplace, iface) { \ - for (size_t i = 0, iEnd = iface.Iterations(); i < iEnd; ++i) { \ - NBench::Clobber(); \ - auto copy = s; \ - ToLower(copy); \ - NBench::Escape(copy.data()); \ - NBench::Clobber(); \ - } \ - } - -#define DEFINE_RET_BENCH(s) \ - Y_CPU_BENCHMARK(s##Ret, iface) { \ - for (size_t i = 0, iEnd = iface.Iterations(); i < iEnd; ++i) { \ - NBench::Clobber(); \ - const auto res = ToLowerRet(TWtringBuf{s}); \ - NBench::Escape(res.data()); \ - NBench::Clobber(); \ - } \ - } - -DEFINE_INPLACE_BENCH(ShortAscii) -DEFINE_INPLACE_BENCH(LongAscii) -DEFINE_INPLACE_BENCH(ShortRussian) -DEFINE_INPLACE_BENCH(LongRussian) - -DEFINE_RET_BENCH(ShortAscii) -DEFINE_RET_BENCH(LongAscii) -DEFINE_RET_BENCH(ShortRussian) -DEFINE_RET_BENCH(LongRussian) diff --git a/util/charset/benchmark/to_lower/metrics/main.py b/util/charset/benchmark/to_lower/metrics/main.py deleted file mode 100644 index e7495d432b..0000000000 --- a/util/charset/benchmark/to_lower/metrics/main.py +++ /dev/null @@ -1,5 +0,0 @@ -import yatest.common as yc - - -def test_export_metrics(metrics): - metrics.set_benchmark(yc.execute_benchmark('util/charset/benchmark/to_lower/to_lower')) diff --git a/util/charset/benchmark/utf8_to_wide/main.cpp b/util/charset/benchmark/utf8_to_wide/main.cpp deleted file mode 100644 index 09fa567fe5..0000000000 --- a/util/charset/benchmark/utf8_to_wide/main.cpp +++ /dev/null @@ -1,161 +0,0 @@ -#include <library/cpp/testing/benchmark/bench.h> - -#include <util/random/fast.h> -#include <util/random/random.h> -#include <util/generic/singleton.h> -#include <util/generic/vector.h> -#include <util/charset/wide.h> - -#include <cmath> - -namespace { - template <size_t N> - struct TRandomAsciiString: public TVector<char> { - inline TRandomAsciiString() { - reserve(N); - for (size_t i = 0; i < N; ++i) { - push_back(RandomNumber<char>(127)); - } - } - }; - - template <size_t N> - struct TRandomRuString: public TVector<char> { - inline TRandomRuString() { - TVector<unsigned char> data(N * 2); - unsigned char* textEnd = data.begin(); - for (size_t i = 0; i < N; ++i) { - size_t runeLen; - WriteUTF8Char(RandomNumber<ui32>(0x7FF) + 1, runeLen, textEnd); - textEnd += runeLen; - } - assign(reinterpret_cast<const char*>(data.begin()), reinterpret_cast<const char*>(textEnd)); - } - }; - - using RAS1 = TRandomAsciiString<1>; - using RAS10 = TRandomAsciiString<10>; - using RAS50 = TRandomAsciiString<50>; - using RAS1000 = TRandomAsciiString<1000>; - using RAS1000000 = TRandomAsciiString<1000000>; - - using RRS1 = TRandomRuString<1>; - using RRS10 = TRandomRuString<10>; - using RRS1000 = TRandomRuString<1000>; - using RRS1000000 = TRandomRuString<1000000>; -} - -#ifdef _sse2_ - #define IS_ASCII_BENCHMARK(length) \ - Y_CPU_BENCHMARK(IsStringASCII##length, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCII(data.begin(), data.end())); \ - } \ - } \ - Y_CPU_BENCHMARK(IsStringASCIISlow##length, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISlow(data.begin(), data.end())); \ - } \ - } \ - Y_CPU_BENCHMARK(IsStringASCIISSE##length, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISSE(reinterpret_cast<const unsigned char*>(data.begin()), reinterpret_cast<const unsigned char*>(data.end()))); \ - } \ - } -#else //no sse - #define IS_ASCII_BENCHMARK(length) \ - Y_CPU_BENCHMARK(IsStringASCIIScalar##length, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCII(data.begin(), data.end())); \ - } \ - } \ - Y_CPU_BENCHMARK(IsStringASCIISlow##length, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISlow(data.begin(), data.end())); \ - } \ - } -#endif - -IS_ASCII_BENCHMARK(1); -IS_ASCII_BENCHMARK(10); -IS_ASCII_BENCHMARK(50); -IS_ASCII_BENCHMARK(1000); -IS_ASCII_BENCHMARK(1000000); - -template <bool robust, typename TCharType> -inline size_t UTF8ToWideImplScalar(const char* text, size_t len, TCharType* dest, size_t& written) { - const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); - const unsigned char* last = cur + len; - TCharType* p = dest; - - ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); - written = p - dest; - return cur - reinterpret_cast<const unsigned char*>(text); -} - -template <bool robust, typename TCharType> -inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, size_t& written) { - return UTF8ToWideImpl(text, len, dest, written); -} - -static wchar16 WBUF_UTF16[10000000]; -static wchar32 WBUF_UTF32[10000000]; - -#define UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(impl, length, to) \ - Y_CPU_BENCHMARK(UTF8ToWideASCII##impl##length##to, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - size_t written = 0; \ - Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ - } \ - } - -#define UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(impl, length, to) \ - Y_CPU_BENCHMARK(UTF8ToWideRU##impl##length##to, iface) { \ - const auto& data = *Singleton<RRS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - size_t written = 0; \ - Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ - } \ - } - -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF16); - -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF16); - -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF32); - -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF32); diff --git a/util/charset/benchmark/utf8_to_wide/metrics/main.py b/util/charset/benchmark/utf8_to_wide/metrics/main.py deleted file mode 100644 index ffbd8f68fd..0000000000 --- a/util/charset/benchmark/utf8_to_wide/metrics/main.py +++ /dev/null @@ -1,5 +0,0 @@ -import yatest.common as yc - - -def test_export_metrics(metrics): - metrics.set_benchmark(yc.execute_benchmark('util/charset/benchmark/utf8_to_wide/utf8_to_wide')) |