diff options
author | mihaild <mihaild@yandex-team.ru> | 2022-02-10 16:46:59 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:59 +0300 |
commit | 246417ad6168d3f7ab4a0cf1c79ba4259f7c45ae (patch) | |
tree | 2a65611ade91c8ae2f55647107c1a11ea743abd5 /util/charset/benchmark/utf8_to_wide | |
parent | 5598c5e7bc7619bd51d87fea7b880b7788ad0b47 (diff) | |
download | ydb-246417ad6168d3f7ab4a0cf1c79ba4259f7c45ae.tar.gz |
Restoring authorship annotation for <mihaild@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'util/charset/benchmark/utf8_to_wide')
-rw-r--r-- | util/charset/benchmark/utf8_to_wide/main.cpp | 96 |
1 files changed, 48 insertions, 48 deletions
diff --git a/util/charset/benchmark/utf8_to_wide/main.cpp b/util/charset/benchmark/utf8_to_wide/main.cpp index 09fa567fe5..7683b4dd15 100644 --- a/util/charset/benchmark/utf8_to_wide/main.cpp +++ b/util/charset/benchmark/utf8_to_wide/main.cpp @@ -19,33 +19,33 @@ namespace { } }; - template <size_t N> + template <size_t N> struct TRandomRuString: public TVector<char> { - inline TRandomRuString() { + inline TRandomRuString() { TVector<unsigned char> data(N * 2); - unsigned char* textEnd = data.begin(); - for (size_t i = 0; i < N; ++i) { - size_t runeLen; + unsigned char* textEnd = data.begin(); + for (size_t i = 0; i < N; ++i) { + size_t runeLen; WriteUTF8Char(RandomNumber<ui32>(0x7FF) + 1, runeLen, textEnd); - textEnd += runeLen; - } - assign(reinterpret_cast<const char*>(data.begin()), reinterpret_cast<const char*>(textEnd)); - } - }; - + textEnd += runeLen; + } + assign(reinterpret_cast<const char*>(data.begin()), reinterpret_cast<const char*>(textEnd)); + } + }; + using RAS1 = TRandomAsciiString<1>; using RAS10 = TRandomAsciiString<10>; using RAS50 = TRandomAsciiString<50>; using RAS1000 = TRandomAsciiString<1000>; using RAS1000000 = TRandomAsciiString<1000000>; - - using RRS1 = TRandomRuString<1>; - using RRS10 = TRandomRuString<10>; - using RRS1000 = TRandomRuString<1000>; - using RRS1000000 = TRandomRuString<1000000>; + + using RRS1 = TRandomRuString<1>; + using RRS10 = TRandomRuString<10>; + using RRS1000 = TRandomRuString<1000>; + using RRS1000000 = TRandomRuString<1000000>; } -#ifdef _sse2_ +#ifdef _sse2_ #define IS_ASCII_BENCHMARK(length) \ Y_CPU_BENCHMARK(IsStringASCII##length, iface) { \ const auto& data = *Singleton<RAS##length>(); \ @@ -65,7 +65,7 @@ namespace { Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISSE(reinterpret_cast<const unsigned char*>(data.begin()), reinterpret_cast<const unsigned char*>(data.end()))); \ } \ } -#else //no sse +#else //no sse #define IS_ASCII_BENCHMARK(length) \ Y_CPU_BENCHMARK(IsStringASCIIScalar##length, iface) { \ const auto& data = *Singleton<RAS##length>(); \ @@ -79,33 +79,33 @@ namespace { Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISlow(data.begin(), data.end())); \ } \ } -#endif - -IS_ASCII_BENCHMARK(1); -IS_ASCII_BENCHMARK(10); -IS_ASCII_BENCHMARK(50); -IS_ASCII_BENCHMARK(1000); -IS_ASCII_BENCHMARK(1000000); - -template <bool robust, typename TCharType> -inline size_t UTF8ToWideImplScalar(const char* text, size_t len, TCharType* dest, size_t& written) { - const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); - const unsigned char* last = cur + len; - TCharType* p = dest; - - ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); - written = p - dest; - return cur - reinterpret_cast<const unsigned char*>(text); -} - -template <bool robust, typename TCharType> -inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, size_t& written) { - return UTF8ToWideImpl(text, len, dest, written); -} - +#endif + +IS_ASCII_BENCHMARK(1); +IS_ASCII_BENCHMARK(10); +IS_ASCII_BENCHMARK(50); +IS_ASCII_BENCHMARK(1000); +IS_ASCII_BENCHMARK(1000000); + +template <bool robust, typename TCharType> +inline size_t UTF8ToWideImplScalar(const char* text, size_t len, TCharType* dest, size_t& written) { + const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); + const unsigned char* last = cur + len; + TCharType* p = dest; + + ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); + written = p - dest; + return cur - reinterpret_cast<const unsigned char*>(text); +} + +template <bool robust, typename TCharType> +inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, size_t& written) { + return UTF8ToWideImpl(text, len, dest, written); +} + static wchar16 WBUF_UTF16[10000000]; static wchar32 WBUF_UTF32[10000000]; - + #define UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(impl, length, to) \ Y_CPU_BENCHMARK(UTF8ToWideASCII##impl##length##to, iface) { \ const auto& data = *Singleton<RAS##length>(); \ @@ -113,8 +113,8 @@ static wchar32 WBUF_UTF32[10000000]; size_t written = 0; \ Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ } \ - } - + } + #define UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(impl, length, to) \ Y_CPU_BENCHMARK(UTF8ToWideRU##impl##length##to, iface) { \ const auto& data = *Singleton<RRS##length>(); \ @@ -122,8 +122,8 @@ static wchar32 WBUF_UTF32[10000000]; size_t written = 0; \ Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ } \ - } - + } + UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF16); @@ -132,7 +132,7 @@ UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF16); - + UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF16); |