diff options
author | Ilnur Khuziev <ilnur.khuziev@yandex.ru> | 2022-02-10 16:46:13 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:13 +0300 |
commit | 736dcd8ca259457a136f2f9f9168c44643914323 (patch) | |
tree | ddd46a036d68bfa83aa11b892f31243ea6b068a1 /util/charset | |
parent | 9bf2fa2b060c9881d3135c2208c624a1dd546ecc (diff) | |
download | ydb-736dcd8ca259457a136f2f9f9168c44643914323.tar.gz |
Restoring authorship annotation for Ilnur Khuziev <ilnur.khuziev@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'util/charset')
-rw-r--r-- | util/charset/benchmark/to_lower/main.cpp | 2 | ||||
-rw-r--r-- | util/charset/benchmark/utf8_to_wide/main.cpp | 104 | ||||
-rw-r--r-- | util/charset/utf8.h | 2 | ||||
-rw-r--r-- | util/charset/utf8_ut.cpp | 4 | ||||
-rw-r--r-- | util/charset/wide.cpp | 344 | ||||
-rw-r--r-- | util/charset/wide.h | 240 | ||||
-rw-r--r-- | util/charset/wide_sse41.cpp | 352 | ||||
-rw-r--r-- | util/charset/wide_ut.cpp | 2 |
8 files changed, 525 insertions, 525 deletions
diff --git a/util/charset/benchmark/to_lower/main.cpp b/util/charset/benchmark/to_lower/main.cpp index e95fdc2371..56599b1770 100644 --- a/util/charset/benchmark/to_lower/main.cpp +++ b/util/charset/benchmark/to_lower/main.cpp @@ -1,4 +1,4 @@ -#include <library/cpp/testing/benchmark/bench.h> +#include <library/cpp/testing/benchmark/bench.h> #include <util/charset/wide.h> #include <util/generic/singleton.h> diff --git a/util/charset/benchmark/utf8_to_wide/main.cpp b/util/charset/benchmark/utf8_to_wide/main.cpp index 09fa567fe5..3a56c34361 100644 --- a/util/charset/benchmark/utf8_to_wide/main.cpp +++ b/util/charset/benchmark/utf8_to_wide/main.cpp @@ -1,4 +1,4 @@ -#include <library/cpp/testing/benchmark/bench.h> +#include <library/cpp/testing/benchmark/bench.h> #include <util/random/fast.h> #include <util/random/random.h> @@ -103,59 +103,59 @@ inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, s return UTF8ToWideImpl(text, len, dest, written); } -static wchar16 WBUF_UTF16[10000000]; -static wchar32 WBUF_UTF32[10000000]; +static wchar16 WBUF_UTF16[10000000]; +static wchar32 WBUF_UTF32[10000000]; -#define UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(impl, length, to) \ - Y_CPU_BENCHMARK(UTF8ToWideASCII##impl##length##to, iface) { \ - const auto& data = *Singleton<RAS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - size_t written = 0; \ - Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ - } \ +#define UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(impl, length, to) \ + Y_CPU_BENCHMARK(UTF8ToWideASCII##impl##length##to, iface) { \ + const auto& data = *Singleton<RAS##length>(); \ + for (size_t x = 0; x < iface.Iterations(); ++x) { \ + size_t written = 0; \ + Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ + } \ } -#define UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(impl, length, to) \ - Y_CPU_BENCHMARK(UTF8ToWideRU##impl##length##to, iface) { \ - const auto& data = *Singleton<RRS##length>(); \ - for (size_t x = 0; x < iface.Iterations(); ++x) { \ - size_t written = 0; \ - Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ - } \ +#define UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(impl, length, to) \ + Y_CPU_BENCHMARK(UTF8ToWideRU##impl##length##to, iface) { \ + const auto& data = *Singleton<RRS##length>(); \ + for (size_t x = 0; x < iface.Iterations(); ++x) { \ + size_t written = 0; \ + Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ + } \ } -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF16); - -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF16); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF16); - -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF32); - -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF32); -UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF16); + +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF16); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF16); + +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 10, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF32); + +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 10, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1000000, UTF32); +UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1000000, UTF32); diff --git a/util/charset/utf8.h b/util/charset/utf8.h index 5039b46ae9..08499ff77f 100644 --- a/util/charset/utf8.h +++ b/util/charset/utf8.h @@ -194,7 +194,7 @@ inline RECODE_RESULT SafeReadUTF8Char(wchar32& rune, size_t& rune_len, const uns //! @param c value of the current character //! @param p pointer to the current character, it will be changed in case of valid UTF8 byte sequence //! @param e the end of the character sequence -Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigned char*& p, const unsigned char* e) noexcept { +Y_FORCE_INLINE RECODE_RESULT ReadUTF8CharAndAdvance(wchar32& rune, const unsigned char*& p, const unsigned char* e) noexcept { Y_ASSERT(p < e); // since p < e then we will check RECODE_EOINPUT only for n > 1 (see calls of this functions) switch (UTF8RuneLen(*p)) { case 0: diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp index 9e68881cca..7f50134624 100644 --- a/util/charset/utf8_ut.cpp +++ b/util/charset/utf8_ut.cpp @@ -4,8 +4,8 @@ #include <util/stream/file.h> #include <util/ysaveload.h> -#include <library/cpp/testing/unittest/registar.h> -#include <library/cpp/testing/unittest/env.h> +#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/env.h> Y_UNIT_TEST_SUITE(TUtfUtilTest) { Y_UNIT_TEST(TestUTF8Len) { diff --git a/util/charset/wide.cpp b/util/charset/wide.cpp index a287438ddd..7e96349631 100644 --- a/util/charset/wide.cpp +++ b/util/charset/wide.cpp @@ -146,8 +146,8 @@ bool IsTitleWord(const TWtringBuf text) noexcept { return IsLowerWord({p, pe}); } -template <bool stopOnFirstModification, typename TCharType, typename F> -static bool ModifySequence(TCharType*& p, const TCharType* const pe, F&& f) { +template <bool stopOnFirstModification, typename TCharType, typename F> +static bool ModifySequence(TCharType*& p, const TCharType* const pe, F&& f) { while (p != pe) { const auto symbol = ReadSymbol(p, pe); const auto modified = f(symbol); @@ -165,8 +165,8 @@ static bool ModifySequence(TCharType*& p, const TCharType* const pe, F&& f) { return false; } -template <bool stopOnFirstModification, typename TCharType, typename F> -static bool ModifySequence(const TCharType*& p, const TCharType* const pe, TCharType*& out, F&& f) { +template <bool stopOnFirstModification, typename TCharType, typename F> +static bool ModifySequence(const TCharType*& p, const TCharType* const pe, TCharType*& out, F&& f) { while (p != pe) { const auto symbol = stopOnFirstModification ? ReadSymbol(p, pe) : ReadSymbolAndAdvance(p, pe); const auto modified = f(symbol); @@ -193,8 +193,8 @@ static void DetachAndFixPointers(TStringType& text, typename TStringType::value_ pe = p + count; } -template <class TStringType, typename F> -static bool ModifyStringSymbolwise(TStringType& text, size_t pos, size_t count, F&& f) { +template <class TStringType, typename F> +static bool ModifyStringSymbolwise(TStringType& text, size_t pos, size_t count, F&& f) { // TODO(yazevnul): this is done for consistency with `TUtf16String::to_lower` and friends // at r2914050, maybe worth replacing them with asserts. Also see the same code in `ToTitle`. pos = pos < text.size() ? pos : text.size(); @@ -225,16 +225,16 @@ bool ToUpper(TUtf16String& text, size_t pos, size_t count) { return ModifyStringSymbolwise(text, pos, count, f); } -bool ToLower(TUtf32String& text, size_t pos, size_t count) { +bool ToLower(TUtf32String& text, size_t pos, size_t count) { const auto f = [](const wchar32 s) { return ToLower(s); }; - return ModifyStringSymbolwise(text, pos, count, f); -} - -bool ToUpper(TUtf32String& text, size_t pos, size_t count) { + return ModifyStringSymbolwise(text, pos, count, f); +} + +bool ToUpper(TUtf32String& text, size_t pos, size_t count) { const auto f = [](const wchar32 s) { return ToUpper(s); }; - return ModifyStringSymbolwise(text, pos, count, f); -} - + return ModifyStringSymbolwise(text, pos, count, f); +} + bool ToTitle(TUtf16String& text, size_t pos, size_t count) { if (!text) { return false; @@ -266,37 +266,37 @@ bool ToTitle(TUtf16String& text, size_t pos, size_t count) { return false; } -bool ToTitle(TUtf32String& text, size_t pos, size_t count) { - if (!text) { - return false; - } - - pos = pos < text.size() ? pos : text.size(); - count = count < text.size() - pos ? count : text.size() - pos; - +bool ToTitle(TUtf32String& text, size_t pos, size_t count) { + if (!text) { + return false; + } + + pos = pos < text.size() ? pos : text.size(); + count = count < text.size() - pos ? count : text.size() - pos; + const auto toLower = [](const wchar32 s) { return ToLower(s); }; - - auto* p = const_cast<wchar32*>(text.data() + pos); - const auto* pe = text.data() + pos + count; - - const auto firstSymbol = *p; - if (firstSymbol == ToTitle(firstSymbol)) { - p += 1; - if (ModifySequence<true>(p, pe, toLower)) { - DetachAndFixPointers(text, p, pe); - ModifySequence<false>(p, pe, toLower); - return true; - } - } else { - DetachAndFixPointers(text, p, pe); - WriteSymbol(ToTitle(ReadSymbol(p, pe)), p); // also moves `p` forward - ModifySequence<false>(p, pe, toLower); - return true; - } - - return false; -} - + + auto* p = const_cast<wchar32*>(text.data() + pos); + const auto* pe = text.data() + pos + count; + + const auto firstSymbol = *p; + if (firstSymbol == ToTitle(firstSymbol)) { + p += 1; + if (ModifySequence<true>(p, pe, toLower)) { + DetachAndFixPointers(text, p, pe); + ModifySequence<false>(p, pe, toLower); + return true; + } + } else { + DetachAndFixPointers(text, p, pe); + WriteSymbol(ToTitle(ReadSymbol(p, pe)), p); // also moves `p` forward + ModifySequence<false>(p, pe, toLower); + return true; + } + + return false; +} + TUtf16String ToLowerRet(TUtf16String text, size_t pos, size_t count) { ToLower(text, pos, count); return text; @@ -312,21 +312,21 @@ TUtf16String ToTitleRet(TUtf16String text, size_t pos, size_t count) { return text; } -TUtf32String ToLowerRet(TUtf32String text, size_t pos, size_t count) { - ToLower(text, pos, count); - return text; -} - -TUtf32String ToUpperRet(TUtf32String text, size_t pos, size_t count) { - ToUpper(text, pos, count); - return text; -} - -TUtf32String ToTitleRet(TUtf32String text, size_t pos, size_t count) { - ToTitle(text, pos, count); - return text; -} - +TUtf32String ToLowerRet(TUtf32String text, size_t pos, size_t count) { + ToLower(text, pos, count); + return text; +} + +TUtf32String ToUpperRet(TUtf32String text, size_t pos, size_t count) { + ToUpper(text, pos, count); + return text; +} + +TUtf32String ToTitleRet(TUtf32String text, size_t pos, size_t count) { + ToTitle(text, pos, count); + return text; +} + bool ToLower(const wchar16* text, size_t length, wchar16* out) noexcept { // TODO(yazevnul): get rid of `text == out` case (it is probably used only in lemmer) and then // we can declare text and out as `__restrict__` @@ -408,87 +408,87 @@ bool ToTitle(wchar16* text, size_t length) noexcept { return ToLower(text, textEnd - text) || firstSymbol != firstSymbolTitle; } -bool ToLower(const wchar32* text, size_t length, wchar32* out) noexcept { - // TODO(yazevnul): get rid of `text == out` case (it is probably used only in lemmer) and then - // we can declare text and out as `__restrict__` - Y_ASSERT(text == out || !(out >= text && out < text + length)); +bool ToLower(const wchar32* text, size_t length, wchar32* out) noexcept { + // TODO(yazevnul): get rid of `text == out` case (it is probably used only in lemmer) and then + // we can declare text and out as `__restrict__` + Y_ASSERT(text == out || !(out >= text && out < text + length)); const auto f = [](const wchar32 s) { return ToLower(s); }; - const auto* p = text; - const auto* const pe = text + length; - if (ModifySequence<true>(p, pe, out, f)) { - ModifySequence<false>(p, pe, out, f); - return true; - } - return false; -} - -bool ToUpper(const wchar32* text, size_t length, wchar32* out) noexcept { - Y_ASSERT(text == out || !(out >= text && out < text + length)); + const auto* p = text; + const auto* const pe = text + length; + if (ModifySequence<true>(p, pe, out, f)) { + ModifySequence<false>(p, pe, out, f); + return true; + } + return false; +} + +bool ToUpper(const wchar32* text, size_t length, wchar32* out) noexcept { + Y_ASSERT(text == out || !(out >= text && out < text + length)); const auto f = [](const wchar32 s) { return ToUpper(s); }; - const auto* p = text; - const auto* const pe = text + length; - if (ModifySequence<true>(p, pe, out, f)) { - ModifySequence<false>(p, pe, out, f); - return true; - } - return false; -} - -bool ToTitle(const wchar32* text, size_t length, wchar32* out) noexcept { - if (!length) { - return false; - } - - Y_ASSERT(text == out || !(out >= text && out < text + length)); - - const auto* const textEnd = text + length; - const auto firstSymbol = ReadSymbolAndAdvance(text, textEnd); - const auto firstSymbolTitle = ToTitle(firstSymbol); - - WriteSymbol(firstSymbolTitle, out); - - return ToLower(text, textEnd - text, out) || firstSymbol != firstSymbolTitle; -} - -bool ToLower(wchar32* text, size_t length) noexcept { + const auto* p = text; + const auto* const pe = text + length; + if (ModifySequence<true>(p, pe, out, f)) { + ModifySequence<false>(p, pe, out, f); + return true; + } + return false; +} + +bool ToTitle(const wchar32* text, size_t length, wchar32* out) noexcept { + if (!length) { + return false; + } + + Y_ASSERT(text == out || !(out >= text && out < text + length)); + + const auto* const textEnd = text + length; + const auto firstSymbol = ReadSymbolAndAdvance(text, textEnd); + const auto firstSymbolTitle = ToTitle(firstSymbol); + + WriteSymbol(firstSymbolTitle, out); + + return ToLower(text, textEnd - text, out) || firstSymbol != firstSymbolTitle; +} + +bool ToLower(wchar32* text, size_t length) noexcept { const auto f = [](const wchar32 s) { return ToLower(s); }; - const auto* const textEnd = text + length; - if (ModifySequence<true>(text, textEnd, f)) { - ModifySequence<false>(text, textEnd, f); - return true; - } - return false; -} - -bool ToUpper(wchar32* text, size_t length) noexcept { + const auto* const textEnd = text + length; + if (ModifySequence<true>(text, textEnd, f)) { + ModifySequence<false>(text, textEnd, f); + return true; + } + return false; +} + +bool ToUpper(wchar32* text, size_t length) noexcept { const auto f = [](const wchar32 s) { return ToUpper(s); }; - const auto* const textEnd = text + length; - if (ModifySequence<true>(text, textEnd, f)) { - ModifySequence<false>(text, textEnd, f); - return true; - } - return false; -} - -bool ToTitle(wchar32* text, size_t length) noexcept { - if (!length) { - return false; - } - - const auto* textEnd = text + length; - const auto firstSymbol = ReadSymbol(text, textEnd); - const auto firstSymbolTitle = ToTitle(firstSymbol); - - // avoid unnacessary writes to the memory - if (firstSymbol != firstSymbolTitle) { - WriteSymbol(firstSymbolTitle, text); - } else { - text = SkipSymbol(text, textEnd); - } - - return ToLower(text, textEnd - text) || firstSymbol != firstSymbolTitle; -} - + const auto* const textEnd = text + length; + if (ModifySequence<true>(text, textEnd, f)) { + ModifySequence<false>(text, textEnd, f); + return true; + } + return false; +} + +bool ToTitle(wchar32* text, size_t length) noexcept { + if (!length) { + return false; + } + + const auto* textEnd = text + length; + const auto firstSymbol = ReadSymbol(text, textEnd); + const auto firstSymbolTitle = ToTitle(firstSymbol); + + // avoid unnacessary writes to the memory + if (firstSymbol != firstSymbolTitle) { + WriteSymbol(firstSymbolTitle, text); + } else { + text = SkipSymbol(text, textEnd); + } + + return ToLower(text, textEnd - text) || firstSymbol != firstSymbolTitle; +} + template <typename F> static TUtf16String ToSmthRet(const TWtringBuf text, size_t pos, size_t count, F&& f) { pos = pos < text.size() ? pos : text.size(); @@ -510,27 +510,27 @@ static TUtf16String ToSmthRet(const TWtringBuf text, size_t pos, size_t count, F return res; } -template <typename F> -static TUtf32String ToSmthRet(const TUtf32StringBuf text, size_t pos, size_t count, F&& f) { - pos = pos < text.size() ? pos : text.size(); - count = count < text.size() - pos ? count : text.size() - pos; - - auto res = TUtf32String::Uninitialized(text.size()); - auto* const resBegin = res.Detach(); - - if (pos) { - MemCopy(resBegin, text.data(), pos); - } - - f(text.data() + pos, count, resBegin + pos); - - if (count - pos != text.size()) { - MemCopy(resBegin + pos + count, text.data() + pos + count, text.size() - pos - count); - } - - return res; -} - +template <typename F> +static TUtf32String ToSmthRet(const TUtf32StringBuf text, size_t pos, size_t count, F&& f) { + pos = pos < text.size() ? pos : text.size(); + count = count < text.size() - pos ? count : text.size() - pos; + + auto res = TUtf32String::Uninitialized(text.size()); + auto* const resBegin = res.Detach(); + + if (pos) { + MemCopy(resBegin, text.data(), pos); + } + + f(text.data() + pos, count, resBegin + pos); + + if (count - pos != text.size()) { + MemCopy(resBegin + pos + count, text.data() + pos + count, text.size() - pos - count); + } + + return res; +} + TUtf16String ToLowerRet(const TWtringBuf text, size_t pos, size_t count) { return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) { ToLower(theText, length, out); @@ -549,24 +549,24 @@ TUtf16String ToTitleRet(const TWtringBuf text, size_t pos, size_t count) { }); } -TUtf32String ToLowerRet(const TUtf32StringBuf text, size_t pos, size_t count) { +TUtf32String ToLowerRet(const TUtf32StringBuf text, size_t pos, size_t count) { return ToSmthRet(text, pos, count, [](const wchar32* theText, size_t length, wchar32* out) { - ToLower(theText, length, out); - }); -} - -TUtf32String ToUpperRet(const TUtf32StringBuf text, size_t pos, size_t count) { + ToLower(theText, length, out); + }); +} + +TUtf32String ToUpperRet(const TUtf32StringBuf text, size_t pos, size_t count) { return ToSmthRet(text, pos, count, [](const wchar32* theText, size_t length, wchar32* out) { - ToUpper(theText, length, out); - }); -} - -TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos, size_t count) { + ToUpper(theText, length, out); + }); +} + +TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos, size_t count) { return ToSmthRet(text, pos, count, [](const wchar32* theText, size_t length, wchar32* out) { - ToTitle(theText, length, out); - }); -} - + ToTitle(theText, length, out); + }); +} + template <bool insertBr> void EscapeHtmlChars(TUtf16String& str) { static const TUtf16String lt(LT, Y_ARRAY_SIZE(LT)); diff --git a/util/charset/wide.h b/util/charset/wide.h index 04e6928aab..5a566983fa 100644 --- a/util/charset/wide.h +++ b/util/charset/wide.h @@ -34,20 +34,20 @@ namespace NDetail { template <> struct TSelector<false> { template <class T> - static inline void WriteSymbol(wchar16 s, T& dest) noexcept { + static inline void WriteSymbol(wchar16 s, T& dest) noexcept { dest.push_back(s); } }; - + template <> struct TSelector<true> { template <class T> - static inline void WriteSymbol(wchar16 s, T& dest) noexcept { + static inline void WriteSymbol(wchar16 s, T& dest) noexcept { *(dest++) = s; } }; - inline wchar32 ReadSurrogatePair(const wchar16* chars) noexcept { + inline wchar32 ReadSurrogatePair(const wchar16* chars) noexcept { const wchar32 SURROGATE_OFFSET = static_cast<wchar32>(0x10000 - (0xD800 << 10) - 0xDC00); wchar16 lead = chars[0]; wchar16 tail = chars[1]; @@ -59,26 +59,26 @@ namespace NDetail { } template <class T> - inline void WriteSurrogatePair(wchar32 s, T& dest) noexcept; + inline void WriteSurrogatePair(wchar32 s, T& dest) noexcept; } -inline wchar16* SkipSymbol(wchar16* begin, const wchar16* end) noexcept { +inline wchar16* SkipSymbol(wchar16* begin, const wchar16* end) noexcept { return begin + W16SymbolSize(begin, end); } -inline const wchar16* SkipSymbol(const wchar16* begin, const wchar16* end) noexcept { +inline const wchar16* SkipSymbol(const wchar16* begin, const wchar16* end) noexcept { return begin + W16SymbolSize(begin, end); } -inline wchar32* SkipSymbol(wchar32* begin, const wchar32* end) noexcept { - Y_ASSERT(begin < end); - return begin + 1; -} -inline const wchar32* SkipSymbol(const wchar32* begin, const wchar32* end) noexcept { - Y_ASSERT(begin < end); - return begin + 1; -} +inline wchar32* SkipSymbol(wchar32* begin, const wchar32* end) noexcept { + Y_ASSERT(begin < end); + return begin + 1; +} +inline const wchar32* SkipSymbol(const wchar32* begin, const wchar32* end) noexcept { + Y_ASSERT(begin < end); + return begin + 1; +} -inline wchar32 ReadSymbol(const wchar16* begin, const wchar16* end) noexcept { +inline wchar32 ReadSymbol(const wchar16* begin, const wchar16* end) noexcept { Y_ASSERT(begin < end); if (IsW16SurrogateLead(*begin)) { if (begin + 1 < end && IsW16SurrogateTail(*(begin + 1))) @@ -92,13 +92,13 @@ inline wchar32 ReadSymbol(const wchar16* begin, const wchar16* end) noexcept { return *begin; } -inline wchar32 ReadSymbol(const wchar32* begin, const wchar32* end) noexcept { - Y_ASSERT(begin < end); - return *begin; -} - +inline wchar32 ReadSymbol(const wchar32* begin, const wchar32* end) noexcept { + Y_ASSERT(begin < end); + return *begin; +} + //! presuming input data is either big enought of null terminated -inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin) noexcept { +inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin) noexcept { Y_ASSERT(*begin); if (IsW16SurrogateLead(begin[0])) { if (IsW16SurrogateTail(begin[1])) { @@ -116,13 +116,13 @@ inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin) noexcept { return *(begin++); } -//! presuming input data is either big enought of null terminated -inline wchar32 ReadSymbolAndAdvance(const wchar32*& begin) noexcept { - Y_ASSERT(*begin); - return *(begin++); -} - -inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin, const wchar16* end) noexcept { +//! presuming input data is either big enought of null terminated +inline wchar32 ReadSymbolAndAdvance(const wchar32*& begin) noexcept { + Y_ASSERT(*begin); + return *(begin++); +} + +inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin, const wchar16* end) noexcept { Y_ASSERT(begin < end); if (IsW16SurrogateLead(begin[0])) { if (begin + 1 != end && IsW16SurrogateTail(begin[1])) { @@ -139,19 +139,19 @@ inline wchar32 ReadSymbolAndAdvance(const wchar16*& begin, const wchar16* end) n return *(begin++); } -inline wchar32 ReadSymbolAndAdvance(const wchar32*& begin, const wchar32* end) noexcept { +inline wchar32 ReadSymbolAndAdvance(const wchar32*& begin, const wchar32* end) noexcept { Y_ASSERT(begin < end); return *(begin++); } template <class T> -inline size_t WriteSymbol(wchar16 s, T& dest) noexcept { +inline size_t WriteSymbol(wchar16 s, T& dest) noexcept { ::NDetail::TSelector<std::is_pointer<T>::value>::WriteSymbol(s, dest); return 1; } template <class T> -inline size_t WriteSymbol(wchar32 s, T& dest) noexcept { +inline size_t WriteSymbol(wchar32 s, T& dest) noexcept { if (s > 0xFFFF) { if (s >= ::NUnicode::UnicodeInstancesLimit()) { return WriteSymbol(static_cast<wchar16>(BROKEN_RUNE), dest); @@ -164,7 +164,7 @@ inline size_t WriteSymbol(wchar32 s, T& dest) noexcept { return WriteSymbol(static_cast<wchar16>(s), dest); } -inline bool WriteSymbol(wchar32 s, wchar16*& dest, const wchar16* destEnd) noexcept { +inline bool WriteSymbol(wchar32 s, wchar16*& dest, const wchar16* destEnd) noexcept { Y_ASSERT(dest < destEnd); if (s > 0xFFFF) { @@ -184,12 +184,12 @@ inline bool WriteSymbol(wchar32 s, wchar16*& dest, const wchar16* destEnd) noexc return true; } -inline size_t WriteSymbol(wchar32 s, wchar32*& dest) noexcept { +inline size_t WriteSymbol(wchar32 s, wchar32*& dest) noexcept { *(dest++) = s; return 1; } -inline bool WriteSymbol(wchar32 s, wchar32*& dest, const wchar32* destEnd) noexcept { +inline bool WriteSymbol(wchar32 s, wchar32*& dest, const wchar32* destEnd) noexcept { Y_ASSERT(dest < destEnd); *(dest++) = s; @@ -260,7 +260,7 @@ public: namespace NDetail { template <bool robust, typename TCharType> - inline void UTF8ToWideImplScalar(const unsigned char*& cur, const unsigned char* last, TCharType*& dest) noexcept { + inline void UTF8ToWideImplScalar(const unsigned char*& cur, const unsigned char* last, TCharType*& dest) noexcept { wchar32 rune = BROKEN_RUNE; while (cur != last) { @@ -278,29 +278,29 @@ namespace NDetail { } } - template <typename TCharType> - inline void UTF16ToUTF32ImplScalar(const wchar16* cur, const wchar16* last, TCharType*& dest) noexcept { - wchar32 rune = BROKEN_RUNE; - - while (cur != last) { - rune = ReadSymbolAndAdvance(cur, last); - Y_ASSERT(cur <= last); - WriteSymbol(rune, dest); - } - } - + template <typename TCharType> + inline void UTF16ToUTF32ImplScalar(const wchar16* cur, const wchar16* last, TCharType*& dest) noexcept { + wchar32 rune = BROKEN_RUNE; + + while (cur != last) { + rune = ReadSymbolAndAdvance(cur, last); + Y_ASSERT(cur <= last); + WriteSymbol(rune, dest); + } + } + template <class TCharType> inline void UTF8ToWideImplSSE41(const unsigned char*& /*cur*/, const unsigned char* /*last*/, TCharType*& /*dest*/) noexcept { } void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar16*& dest) noexcept; - - void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar32*& dest) noexcept; + + void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar32*& dest) noexcept; } //! @return len if robust and position where encoding stopped if not template <bool robust, typename TCharType> -inline size_t UTF8ToWideImpl(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { +inline size_t UTF8ToWideImpl(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); const unsigned char* last = cur + len; TCharType* p = dest; @@ -333,7 +333,7 @@ inline TUtf16String UTF8ToWide(const char* text, size_t len) { } template <bool robust, typename TCharType> -inline bool UTF8ToWide(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { +inline bool UTF8ToWide(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { return UTF8ToWideImpl<robust>(text, len, dest, written) == len; } @@ -342,7 +342,7 @@ inline bool UTF8ToWide(const char* text, size_t len, TCharType* dest, size_t& wr //! conversion stops if a broken symbol is met //! @return @c true if all the text converted successfully, @c false - a broken symbol was found template <typename TCharType> -inline bool UTF8ToWide(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { +inline bool UTF8ToWide(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { return UTF8ToWide<false>(text, len, dest, written); } @@ -350,21 +350,21 @@ template <bool robust> inline TWtringBuf UTF8ToWide(const TStringBuf src, TUtf16String& dst) { dst.ReserveAndResize(src.size()); size_t written = 0; - UTF8ToWideImpl<robust>(src.data(), src.size(), dst.begin(), written); - dst.resize(written); - return dst; -} - -//! if not robust will stop at first error position -template <bool robust> -inline TUtf32StringBuf UTF8ToUTF32(const TStringBuf src, TUtf32String& dst) { - dst.ReserveAndResize(src.size()); - size_t written = 0; - UTF8ToWideImpl<robust>(src.data(), src.size(), dst.begin(), written); + UTF8ToWideImpl<robust>(src.data(), src.size(), dst.begin(), written); dst.resize(written); return dst; } +//! if not robust will stop at first error position +template <bool robust> +inline TUtf32StringBuf UTF8ToUTF32(const TStringBuf src, TUtf32String& dst) { + dst.ReserveAndResize(src.size()); + size_t written = 0; + UTF8ToWideImpl<robust>(src.data(), src.size(), dst.begin(), written); + dst.resize(written); + return dst; +} + inline TWtringBuf UTF8ToWide(const TStringBuf src, TUtf16String& dst) { return UTF8ToWide<false>(src, dst); } @@ -378,13 +378,13 @@ inline TUtf16String UTF8ToWide(const TStringBuf s) { return UTF8ToWide<robust>(s.data(), s.size()); } -template <bool robust> -inline TUtf32String UTF8ToUTF32(const TStringBuf s) { - TUtf32String r; - UTF8ToUTF32<robust>(s, r); - return r; -} - +template <bool robust> +inline TUtf32String UTF8ToUTF32(const TStringBuf s) { + TUtf32String r; + UTF8ToUTF32<robust>(s, r); + return r; +} + inline TUtf16String UTF8ToWide(const TStringBuf s) { return UTF8ToWide<false>(s.data(), s.size()); } @@ -428,23 +428,23 @@ inline TString WideToUTF8(const wchar16* text, size_t len) { return s; } -inline TString WideToUTF8(const wchar32* text, size_t len) { - TString s = TString::Uninitialized(WideToUTF8BufferSize(len)); - size_t written = 0; - WideToUTF8(text, len, s.begin(), written); - Y_ASSERT(s.size() >= written); - s.remove(written); - return s; -} - +inline TString WideToUTF8(const wchar32* text, size_t len) { + TString s = TString::Uninitialized(WideToUTF8BufferSize(len)); + size_t written = 0; + WideToUTF8(text, len, s.begin(), written); + Y_ASSERT(s.size() >= written); + s.remove(written); + return s; +} + inline TString WideToUTF8(const TWtringBuf w) { return WideToUTF8(w.data(), w.size()); } -inline TString WideToUTF8(const TUtf32StringBuf w) { - return WideToUTF8(w.data(), w.size()); -} - +inline TString WideToUTF8(const TUtf32StringBuf w) { + return WideToUTF8(w.data(), w.size()); +} + inline TUtf16String UTF32ToWide(const wchar32* begin, size_t len) { TUtf16String res; res.reserve(len); @@ -653,11 +653,11 @@ inline TUtf16String ASCIIToWide(const TStringBuf s) { return CopyTo<TUtf16String>(s.begin(), s.end()); } -inline TUtf32String ASCIIToUTF32(const TStringBuf s) { - Y_ASSERT(IsStringASCII(s.begin(), s.end())); - return CopyTo<TUtf32String>(s.begin(), s.end()); -} - +inline TUtf32String ASCIIToUTF32(const TStringBuf s) { + Y_ASSERT(IsStringASCII(s.begin(), s.end())); + return CopyTo<TUtf32String>(s.begin(), s.end()); +} + //! returns @c true if string contains whitespace characters only inline bool IsSpace(const wchar16* s, size_t n) { if (n == 0) @@ -739,30 +739,30 @@ bool IsUpper(const TWtringBuf text) noexcept; bool ToLower(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); bool ToUpper(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); -/* Lowercase/uppercase given string inplace. Any alphabetic symbol will be converted to a proper -* case, the rest of the symbols will be kept the same. It is expected that `text` is a correct -* UTF-32 string. -* -* For example `ToLower("heLLo")` will return `"hello"`. -* -* @param text String to modify -* @param pos Position of the first character to modify -* @param count Length of the substring -* @returns `true` if `text` was changed -* -* NOTE: `pos` and `count` are measured in `wchar16`, not in codepoints. -*/ -bool ToLower(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); -bool ToUpper(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); - +/* Lowercase/uppercase given string inplace. Any alphabetic symbol will be converted to a proper +* case, the rest of the symbols will be kept the same. It is expected that `text` is a correct +* UTF-32 string. +* +* For example `ToLower("heLLo")` will return `"hello"`. +* +* @param text String to modify +* @param pos Position of the first character to modify +* @param count Length of the substring +* @returns `true` if `text` was changed +* +* NOTE: `pos` and `count` are measured in `wchar16`, not in codepoints. +*/ +bool ToLower(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); +bool ToUpper(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); + /* Titlecase first symbol and lowercase the rest, see `ToLower` for more details. */ bool ToTitle(TUtf16String& text, size_t pos = 0, size_t count = TUtf16String::npos); -/* Titlecase first symbol and lowercase the rest, see `ToLower` for more details. -*/ -bool ToTitle(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); - +/* Titlecase first symbol and lowercase the rest, see `ToLower` for more details. +*/ +bool ToTitle(TUtf32String& /*text*/, size_t /*pos*/ = 0, size_t /*count*/ = TUtf16String::npos); + /* @param text Pointer to the string to modify * @param length Length of the string to modify * @param out Pointer to the character array to write to @@ -776,10 +776,10 @@ bool ToLower(const wchar16* text, size_t length, wchar16* out) noexcept; bool ToUpper(const wchar16* text, size_t length, wchar16* out) noexcept; bool ToTitle(const wchar16* text, size_t length, wchar16* out) noexcept; -bool ToLower(const wchar32* text, size_t length, wchar32* out) noexcept; -bool ToUpper(const wchar32* text, size_t length, wchar32* out) noexcept; -bool ToTitle(const wchar32* text, size_t length, wchar32* out) noexcept; - +bool ToLower(const wchar32* text, size_t length, wchar32* out) noexcept; +bool ToUpper(const wchar32* text, size_t length, wchar32* out) noexcept; +bool ToTitle(const wchar32* text, size_t length, wchar32* out) noexcept; + /* @param text Pointer to the string to modify * @param length Length of the string to modify * @@ -789,10 +789,10 @@ bool ToLower(wchar16* text, size_t length) noexcept; bool ToUpper(wchar16* text, size_t length) noexcept; bool ToTitle(wchar16* text, size_t length) noexcept; -bool ToLower(wchar32* text, size_t length) noexcept; -bool ToUpper(wchar32* text, size_t length) noexcept; -bool ToTitle(wchar32* text, size_t length) noexcept; - +bool ToLower(wchar32* text, size_t length) noexcept; +bool ToUpper(wchar32* text, size_t length) noexcept; +bool ToTitle(wchar32* text, size_t length) noexcept; + /* Convenience wrappers for `ToLower`, `ToUpper` and `ToTitle`. */ TUtf16String ToLowerRet(TUtf16String text, size_t pos = 0, size_t count = TUtf16String::npos) Y_WARN_UNUSED_RESULT; @@ -803,10 +803,10 @@ TUtf16String ToLowerRet(const TWtringBuf text, size_t pos = 0, size_t count = TW TUtf16String ToUpperRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; TUtf16String ToTitleRet(const TWtringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; -TUtf32String ToLowerRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; -TUtf32String ToUpperRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; -TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; - +TUtf32String ToLowerRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; +TUtf32String ToUpperRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; +TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos = 0, size_t count = TWtringBuf::npos) Y_WARN_UNUSED_RESULT; + //! replaces the '<', '>' and '&' characters in string with '<', '>' and '&' respectively // insertBr=true - replace '\r' and '\n' with "<BR>" template <bool insertBr> diff --git a/util/charset/wide_sse41.cpp b/util/charset/wide_sse41.cpp index d1f2a74851..a4c0982f56 100644 --- a/util/charset/wide_sse41.cpp +++ b/util/charset/wide_sse41.cpp @@ -21,226 +21,226 @@ namespace NDetail { //processes to the first error, or until less then 16 bytes left //most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html -//return dstAdvance 0 in case of problems -static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned char*& cur, __m128i& utf16Low, __m128i& utf16High) { - unsigned char curAligned[16]; - - memcpy(curAligned, cur, sizeof(__m128i)); - __m128i chunk = _mm_load_si128(reinterpret_cast<const __m128i*>(curAligned)); - - //only ascii characters - simple copy - if (!_mm_movemask_epi8(chunk)) { - utf16Low = _mm_unpacklo_epi8(chunk, _mm_setzero_si128()); - utf16High = _mm_unpackhi_epi8(chunk, _mm_setzero_si128()); - cur += 16; - return 16; - } +//return dstAdvance 0 in case of problems +static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned char*& cur, __m128i& utf16Low, __m128i& utf16High) { + unsigned char curAligned[16]; + + memcpy(curAligned, cur, sizeof(__m128i)); + __m128i chunk = _mm_load_si128(reinterpret_cast<const __m128i*>(curAligned)); + + //only ascii characters - simple copy + if (!_mm_movemask_epi8(chunk)) { + utf16Low = _mm_unpacklo_epi8(chunk, _mm_setzero_si128()); + utf16High = _mm_unpackhi_epi8(chunk, _mm_setzero_si128()); + cur += 16; + return 16; + } - __m128i chunkSigned = _mm_add_epi8(chunk, _mm_set1_epi8(0x80)); - __m128i isAsciiMask = _mm_cmpgt_epi8(chunk, _mm_set1_epi8(0)); + __m128i chunkSigned = _mm_add_epi8(chunk, _mm_set1_epi8(0x80)); + __m128i isAsciiMask = _mm_cmpgt_epi8(chunk, _mm_set1_epi8(0)); - __m128i cond2 = _mm_cmplt_epi8(_mm_set1_epi8(0xc2 - 1 - 0x80), chunkSigned); + __m128i cond2 = _mm_cmplt_epi8(_mm_set1_epi8(0xc2 - 1 - 0x80), chunkSigned); __m128i state = _mm_set1_epi8(0x0 | (char)0x80); - __m128i cond3 = _mm_cmplt_epi8(_mm_set1_epi8(0xe0 - 1 - 0x80), chunkSigned); + __m128i cond3 = _mm_cmplt_epi8(_mm_set1_epi8(0xe0 - 1 - 0x80), chunkSigned); state = _mm_blendv_epi8(state, _mm_set1_epi8(0x2 | (char)0xc0), cond2); - int sourceAdvance; - __m128i shifts; - __m128i chunkLow, chunkHigh; + int sourceAdvance; + __m128i shifts; + __m128i chunkLow, chunkHigh; - if (Y_LIKELY(!_mm_movemask_epi8(cond3))) { - //main case: no bloks of size 3 or 4 + if (Y_LIKELY(!_mm_movemask_epi8(cond3))) { + //main case: no bloks of size 3 or 4 - //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.) - __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7)); + //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.) + __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7)); - __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1)); + __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1)); - shifts = countSub1; - __m128i continuation1 = _mm_slli_si128(countSub1, 1); + shifts = countSub1; + __m128i continuation1 = _mm_slli_si128(countSub1, 1); - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 1)); - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 2)); + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 1)); + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 2)); - __m128i counts = _mm_or_si128(count, continuation1); + __m128i counts = _mm_or_si128(count, continuation1); - __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0)); - __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuation1, _mm_set1_epi8(0)); - __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask); - //each symbol should be exactly one of ascii, continuation or begin - __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask); + __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0)); + __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuation1, _mm_set1_epi8(0)); + __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask); + //each symbol should be exactly one of ascii, continuation or begin + __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask); - if (_mm_movemask_epi8(okMask) != 0xFFFF) { - return 0; - } + if (_mm_movemask_epi8(okMask) != 0xFFFF) { + return 0; + } - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 4)); + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 4)); - __m128i mask = _mm_and_si128(state, _mm_set1_epi8(0xf8)); - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 8)); + __m128i mask = _mm_and_si128(state, _mm_set1_epi8(0xf8)); + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 8)); - chunk = _mm_andnot_si128(mask, chunk); // from now on, we only have usefull bits - shifts = _mm_and_si128(shifts, _mm_cmplt_epi8(counts, _mm_set1_epi8(2))); // <=1 + chunk = _mm_andnot_si128(mask, chunk); // from now on, we only have usefull bits + shifts = _mm_and_si128(shifts, _mm_cmplt_epi8(counts, _mm_set1_epi8(2))); // <=1 - __m128i chunk_right = _mm_slli_si128(chunk, 1); - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), + __m128i chunk_right = _mm_slli_si128(chunk, 1); + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); - chunkLow = _mm_blendv_epi8(chunk, + chunkLow = _mm_blendv_epi8(chunk, _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); - chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); + chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); - chunkHigh = _mm_srli_epi32(chunkHigh, 2); + chunkHigh = _mm_srli_epi32(chunkHigh, 2); - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); - - int c = _mm_extract_epi16(counts, 7); - sourceAdvance = !(c & 0x0200) ? 16 : 15; - - } else { - __m128i mask3 = _mm_slli_si128(cond3, 1); - - __m128i cond4 = _mm_cmplt_epi8(_mm_set1_epi8(0xf0 - 1 - 0x80), chunkSigned); + + int c = _mm_extract_epi16(counts, 7); + sourceAdvance = !(c & 0x0200) ? 16 : 15; + + } else { + __m128i mask3 = _mm_slli_si128(cond3, 1); + + __m128i cond4 = _mm_cmplt_epi8(_mm_set1_epi8(0xf0 - 1 - 0x80), chunkSigned); state = _mm_blendv_epi8(state, _mm_set1_epi8(0x3 | (char)0xe0), cond3); - - // 4 bytes sequences are not vectorize. Fall back to the scalar processing - if (Y_UNLIKELY(_mm_movemask_epi8(cond4))) { - return 0; - } - - //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.) - __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7)); - - __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1)); - __m128i continuation2 = _mm_slli_si128(_mm_subs_epu8(count, _mm_set1_epi8(0x2)), 2); - - shifts = countSub1; - __m128i continuation1 = _mm_slli_si128(countSub1, 1); - - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 1)); - __m128i continuationsRunelen = _mm_or_si128(continuation1, continuation2); - - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 2)); - __m128i counts = _mm_or_si128(count, continuationsRunelen); - - __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0)); - __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuationsRunelen, _mm_set1_epi8(0)); - __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask); - //each symbol should be exactly one of ascii, continuation or begin - __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask); - - if (_mm_movemask_epi8(okMask) != 0xFFFF) { - return 0; - } - - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 4)); - - __m128i mask = _mm_and_si128(state, _mm_set1_epi8(0xf8)); - shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 8)); - - chunk = _mm_andnot_si128(mask, chunk); // from now on, we only have usefull bits - shifts = _mm_and_si128(shifts, _mm_cmplt_epi8(counts, _mm_set1_epi8(2))); // <=1 - - __m128i chunk_right = _mm_slli_si128(chunk, 1); - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), + + // 4 bytes sequences are not vectorize. Fall back to the scalar processing + if (Y_UNLIKELY(_mm_movemask_epi8(cond4))) { + return 0; + } + + //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.) + __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7)); + + __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1)); + __m128i continuation2 = _mm_slli_si128(_mm_subs_epu8(count, _mm_set1_epi8(0x2)), 2); + + shifts = countSub1; + __m128i continuation1 = _mm_slli_si128(countSub1, 1); + + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 1)); + __m128i continuationsRunelen = _mm_or_si128(continuation1, continuation2); + + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 2)); + __m128i counts = _mm_or_si128(count, continuationsRunelen); + + __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0)); + __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuationsRunelen, _mm_set1_epi8(0)); + __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask); + //each symbol should be exactly one of ascii, continuation or begin + __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask); + + if (_mm_movemask_epi8(okMask) != 0xFFFF) { + return 0; + } + + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 4)); + + __m128i mask = _mm_and_si128(state, _mm_set1_epi8(0xf8)); + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 8)); + + chunk = _mm_andnot_si128(mask, chunk); // from now on, we only have usefull bits + shifts = _mm_and_si128(shifts, _mm_cmplt_epi8(counts, _mm_set1_epi8(2))); // <=1 + + __m128i chunk_right = _mm_slli_si128(chunk, 1); + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); - - chunkLow = _mm_blendv_epi8(chunk, + + chunkLow = _mm_blendv_epi8(chunk, _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); - - chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); - - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), + + chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); + + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); - chunkHigh = _mm_srli_epi32(chunkHigh, 2); - - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), + chunkHigh = _mm_srli_epi32(chunkHigh, 2); + + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); - chunkHigh = _mm_or_si128(chunkHigh, + chunkHigh = _mm_or_si128(chunkHigh, _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4), _mm_set1_epi8(0xf0)), mask3)); - int c = _mm_extract_epi16(counts, 7); + int c = _mm_extract_epi16(counts, 7); sourceAdvance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15 : 14; - } + } - shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 8), + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 8), _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8)); - chunkHigh = _mm_slli_si128(chunkHigh, 1); - - __m128i shuf = _mm_add_epi8(shifts, _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); - - chunkLow = _mm_shuffle_epi8(chunkLow, shuf); - chunkHigh = _mm_shuffle_epi8(chunkHigh, shuf); - - utf16Low = _mm_unpacklo_epi8(chunkLow, chunkHigh); - utf16High = _mm_unpackhi_epi8(chunkLow, chunkHigh); - - ui32 s = _mm_extract_epi32(shifts, 3); - ui32 destAdvance = sourceAdvance - (0xff & (s >> (8 * (3 - 16 + sourceAdvance)))); - cur += sourceAdvance; - return destAdvance; -} - -namespace NDetail { - void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar16*& dest) noexcept { - alignas(16) wchar16 destAligned[16]; - - while (cur + 16 <= last) { - __m128i utf16Low; - __m128i utf16High; - ui32 dstAdvance = Unpack16BytesIntoUtf16IfNoSurrogats(cur, utf16Low, utf16High); - - if (dstAdvance == 0) { - break; - } - - _mm_store_si128(reinterpret_cast<__m128i*>(destAligned), utf16Low); - _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 1, utf16High); + chunkHigh = _mm_slli_si128(chunkHigh, 1); + + __m128i shuf = _mm_add_epi8(shifts, _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + chunkLow = _mm_shuffle_epi8(chunkLow, shuf); + chunkHigh = _mm_shuffle_epi8(chunkHigh, shuf); + + utf16Low = _mm_unpacklo_epi8(chunkLow, chunkHigh); + utf16High = _mm_unpackhi_epi8(chunkLow, chunkHigh); + + ui32 s = _mm_extract_epi32(shifts, 3); + ui32 destAdvance = sourceAdvance - (0xff & (s >> (8 * (3 - 16 + sourceAdvance)))); + cur += sourceAdvance; + return destAdvance; +} + +namespace NDetail { + void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar16*& dest) noexcept { + alignas(16) wchar16 destAligned[16]; + + while (cur + 16 <= last) { + __m128i utf16Low; + __m128i utf16High; + ui32 dstAdvance = Unpack16BytesIntoUtf16IfNoSurrogats(cur, utf16Low, utf16High); + + if (dstAdvance == 0) { + break; + } + + _mm_store_si128(reinterpret_cast<__m128i*>(destAligned), utf16Low); + _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 1, utf16High); memcpy(dest, destAligned, sizeof(__m128i) * 2); - dest += dstAdvance; - } - //The rest will be handled sequencially. - // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence - } - - void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar32*& dest) noexcept { - alignas(16) wchar32 destAligned[16]; - - while (cur + 16 <= last) { - __m128i utf16Low; - __m128i utf16High; - ui32 dstAdvance = Unpack16BytesIntoUtf16IfNoSurrogats(cur, utf16Low, utf16High); - - if (dstAdvance == 0) { - break; - } - - //NOTE: we only work in case without surrogat pairs, so we can make simple copying with zeroes in 2 high bytes - __m128i utf32_lowlow = _mm_unpacklo_epi16(utf16Low, _mm_set1_epi8(0)); - __m128i utf32_lowhigh = _mm_unpackhi_epi16(utf16Low, _mm_set1_epi8(0)); - __m128i utf32_highlow = _mm_unpacklo_epi16(utf16High, _mm_set1_epi8(0)); - __m128i utf32_highhigh = _mm_unpackhi_epi16(utf16High, _mm_set1_epi8(0)); - - _mm_store_si128(reinterpret_cast<__m128i*>(destAligned), utf32_lowlow); - _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 1, utf32_lowhigh); - _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 2, utf32_highlow); - _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 3, utf32_highhigh); - - memcpy(dest, destAligned, sizeof(__m128i) * 4); - dest += dstAdvance; + dest += dstAdvance; + } + //The rest will be handled sequencially. + // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence + } + + void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar32*& dest) noexcept { + alignas(16) wchar32 destAligned[16]; + + while (cur + 16 <= last) { + __m128i utf16Low; + __m128i utf16High; + ui32 dstAdvance = Unpack16BytesIntoUtf16IfNoSurrogats(cur, utf16Low, utf16High); + + if (dstAdvance == 0) { + break; + } + + //NOTE: we only work in case without surrogat pairs, so we can make simple copying with zeroes in 2 high bytes + __m128i utf32_lowlow = _mm_unpacklo_epi16(utf16Low, _mm_set1_epi8(0)); + __m128i utf32_lowhigh = _mm_unpackhi_epi16(utf16Low, _mm_set1_epi8(0)); + __m128i utf32_highlow = _mm_unpacklo_epi16(utf16High, _mm_set1_epi8(0)); + __m128i utf32_highhigh = _mm_unpackhi_epi16(utf16High, _mm_set1_epi8(0)); + + _mm_store_si128(reinterpret_cast<__m128i*>(destAligned), utf32_lowlow); + _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 1, utf32_lowhigh); + _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 2, utf32_highlow); + _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 3, utf32_highhigh); + + memcpy(dest, destAligned, sizeof(__m128i) * 4); + dest += dstAdvance; } //The rest will be handled sequencially. - // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence + // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence } } diff --git a/util/charset/wide_ut.cpp b/util/charset/wide_ut.cpp index d8f3233e73..aa1a28f84f 100644 --- a/util/charset/wide_ut.cpp +++ b/util/charset/wide_ut.cpp @@ -1,7 +1,7 @@ #include "utf8.h" #include "wide.h" -#include <library/cpp/testing/unittest/registar.h> +#include <library/cpp/testing/unittest/registar.h> #include <util/string/reverse.h> |