aboutsummaryrefslogtreecommitdiffstats
path: root/util/charset
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /util/charset
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'util/charset')
-rw-r--r--util/charset/benchmark/to_lower/main.cpp61
-rw-r--r--util/charset/benchmark/to_lower/metrics/main.py5
-rw-r--r--util/charset/benchmark/utf8_to_wide/main.cpp161
-rw-r--r--util/charset/benchmark/utf8_to_wide/metrics/main.py5
4 files changed, 0 insertions, 232 deletions
diff --git a/util/charset/benchmark/to_lower/main.cpp b/util/charset/benchmark/to_lower/main.cpp
deleted file mode 100644
index e95fdc2371..0000000000
--- a/util/charset/benchmark/to_lower/main.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <library/cpp/testing/benchmark/bench.h>
-
-#include <util/charset/wide.h>
-#include <util/generic/singleton.h>
-#include <util/generic/vector.h>
-#include <util/generic/string.h>
-
-static const auto ShortAscii = UTF8ToWide("hELlo");
-static const auto LongAscii = UTF8ToWide(
- "The first plane, plane 0, the Basic Multilingual Plane (BMP) contains characters for almost "
- "all modern languages, and a large number of symbols. A primary objective for the BMP is to "
- "support the unification of prior character sets as well as characters for writing. Most of "
- "the assigned code points in the BMP are used to encode Chinese, Japanese, and Korean (CJK) "
- "characters.");
-
-static const auto ShortRussian = UTF8ToWide("пРИвет");
-static const auto LongRussian = UTF8ToWide(
- "Плоскость 0 (Основная многоязычная плоскость, англ. Basic Multilingual Plane, BMP) отведена "
- "для символов практически всех современных письменностей и большого числа специальных символов. "
- "Большая часть таблицы занята китайско-японскими иероглифами и своеобразными корейскими"
- "буквами. В Юникоде 10.0 в этой плоскости представлены следующие блоки");
-
-#define DEFINE_INPLACE_BENCH(s) \
- Y_CPU_BENCHMARK(s##CopyDetach, iface) { \
- for (size_t i = 0, iEnd = iface.Iterations(); i < iEnd; ++i) { \
- NBench::Clobber(); \
- auto copy = s; \
- NBench::Escape(copy.Detach()); \
- NBench::Clobber(); \
- } \
- } \
- \
- Y_CPU_BENCHMARK(s##Inplace, iface) { \
- for (size_t i = 0, iEnd = iface.Iterations(); i < iEnd; ++i) { \
- NBench::Clobber(); \
- auto copy = s; \
- ToLower(copy); \
- NBench::Escape(copy.data()); \
- NBench::Clobber(); \
- } \
- }
-
-#define DEFINE_RET_BENCH(s) \
- Y_CPU_BENCHMARK(s##Ret, iface) { \
- for (size_t i = 0, iEnd = iface.Iterations(); i < iEnd; ++i) { \
- NBench::Clobber(); \
- const auto res = ToLowerRet(TWtringBuf{s}); \
- NBench::Escape(res.data()); \
- NBench::Clobber(); \
- } \
- }
-
-DEFINE_INPLACE_BENCH(ShortAscii)
-DEFINE_INPLACE_BENCH(LongAscii)
-DEFINE_INPLACE_BENCH(ShortRussian)
-DEFINE_INPLACE_BENCH(LongRussian)
-
-DEFINE_RET_BENCH(ShortAscii)
-DEFINE_RET_BENCH(LongAscii)
-DEFINE_RET_BENCH(ShortRussian)
-DEFINE_RET_BENCH(LongRussian)
diff --git a/util/charset/benchmark/to_lower/metrics/main.py b/util/charset/benchmark/to_lower/metrics/main.py
deleted file mode 100644
index e7495d432b..0000000000
--- a/util/charset/benchmark/to_lower/metrics/main.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import yatest.common as yc
-
-
-def test_export_metrics(metrics):
- metrics.set_benchmark(yc.execute_benchmark('util/charset/benchmark/to_lower/to_lower'))
diff --git a/util/charset/benchmark/utf8_to_wide/main.cpp b/util/charset/benchmark/utf8_to_wide/main.cpp
deleted file mode 100644
index 09fa567fe5..0000000000
--- a/util/charset/benchmark/utf8_to_wide/main.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-#include <library/cpp/testing/benchmark/bench.h>
-
-#include <util/random/fast.h>
-#include <util/random/random.h>
-#include <util/generic/singleton.h>
-#include <util/generic/vector.h>
-#include <util/charset/wide.h>
-
-#include <cmath>
-
-namespace {
- template <size_t N>
- struct TRandomAsciiString: public TVector<char> {
- inline TRandomAsciiString() {
- reserve(N);
- for (size_t i = 0; i < N; ++i) {
- push_back(RandomNumber<char>(127));
- }
- }
- };
-
- template <size_t N>
- struct TRandomRuString: public TVector<char> {
- inline TRandomRuString() {
- TVector<unsigned char> data(N * 2);
- unsigned char* textEnd = data.begin();
- for (size_t i = 0; i < N; ++i) {
- size_t runeLen;
- WriteUTF8Char(RandomNumber<ui32>(0x7FF) + 1, runeLen, textEnd);
- textEnd += runeLen;
- }
- assign(reinterpret_cast<const char*>(data.begin()), reinterpret_cast<const char*>(textEnd));
- }
- };
-
- using RAS1 = TRandomAsciiString<1>;
- using RAS10 = TRandomAsciiString<10>;
- using RAS50 = TRandomAsciiString<50>;
- using RAS1000 = TRandomAsciiString<1000>;
- using RAS1000000 = TRandomAsciiString<1000000>;
-
- using RRS1 = TRandomRuString<1>;
- using RRS10 = TRandomRuString<10>;
- using RRS1000 = TRandomRuString<1000>;
- using RRS1000000 = TRandomRuString<1000000>;
-}
-
-#ifdef _sse2_
- #define IS_ASCII_BENCHMARK(length) \
- Y_CPU_BENCHMARK(IsStringASCII##length, iface) { \
- const auto& data = *Singleton<RAS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCII(data.begin(), data.end())); \
- } \
- } \
- Y_CPU_BENCHMARK(IsStringASCIISlow##length, iface) { \
- const auto& data = *Singleton<RAS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISlow(data.begin(), data.end())); \
- } \
- } \
- Y_CPU_BENCHMARK(IsStringASCIISSE##length, iface) { \
- const auto& data = *Singleton<RAS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISSE(reinterpret_cast<const unsigned char*>(data.begin()), reinterpret_cast<const unsigned char*>(data.end()))); \
- } \
- }
-#else //no sse
- #define IS_ASCII_BENCHMARK(length) \
- Y_CPU_BENCHMARK(IsStringASCIIScalar##length, iface) { \
- const auto& data = *Singleton<RAS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCII(data.begin(), data.end())); \
- } \
- } \
- Y_CPU_BENCHMARK(IsStringASCIISlow##length, iface) { \
- const auto& data = *Singleton<RAS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISlow(data.begin(), data.end())); \
- } \
- }
-#endif
-
-IS_ASCII_BENCHMARK(1);
-IS_ASCII_BENCHMARK(10);
-IS_ASCII_BENCHMARK(50);
-IS_ASCII_BENCHMARK(1000);
-IS_ASCII_BENCHMARK(1000000);
-
-template <bool robust, typename TCharType>
-inline size_t UTF8ToWideImplScalar(const char* text, size_t len, TCharType* dest, size_t& written) {
- const unsigned char* cur = reinterpret_cast<const unsigned char*>(text);
- const unsigned char* last = cur + len;
- TCharType* p = dest;
-
- ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p);
- written = p - dest;
- return cur - reinterpret_cast<const unsigned char*>(text);
-}
-
-template <bool robust, typename TCharType>
-inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, size_t& written) {
- return UTF8ToWideImpl(text, len, dest, written);
-}
-
-static wchar16 WBUF_UTF16[10000000];
-static wchar32 WBUF_UTF32[10000000];
-
-#define UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(impl, length, to) \
- Y_CPU_BENCHMARK(UTF8ToWideASCII##impl##length##to, iface) { \
- const auto& data = *Singleton<RAS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- size_t written = 0; \
- Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \
- } \
- }
-
-#define UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(impl, length, to) \
- Y_CPU_BENCHMARK(UTF8ToWideRU##impl##length##to, iface) { \
- const auto& data = *Singleton<RRS##length>(); \
- for (size_t x = 0; x < iface.Iterations(); ++x) { \
- size_t written = 0; \
- Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \
- } \
- }
-
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF16);
-
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF16);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF16);
-
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF32);
-
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF32);
-UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF32);
diff --git a/util/charset/benchmark/utf8_to_wide/metrics/main.py b/util/charset/benchmark/utf8_to_wide/metrics/main.py
deleted file mode 100644
index ffbd8f68fd..0000000000
--- a/util/charset/benchmark/utf8_to_wide/metrics/main.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import yatest.common as yc
-
-
-def test_export_metrics(metrics):
- metrics.set_benchmark(yc.execute_benchmark('util/charset/benchmark/utf8_to_wide/utf8_to_wide'))