diff options
author | mihaild <mihaild@yandex-team.ru> | 2022-02-10 16:46:59 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:59 +0300 |
commit | 55fa8c7df8dba9a6fda8a807e529a9d04bd88580 (patch) | |
tree | b83306b6e37edeea782e9eed673d89286c4fef35 /util/charset | |
parent | 246417ad6168d3f7ab4a0cf1c79ba4259f7c45ae (diff) | |
download | ydb-55fa8c7df8dba9a6fda8a807e529a9d04bd88580.tar.gz |
Restoring authorship annotation for <mihaild@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'util/charset')
-rw-r--r-- | util/charset/benchmark/utf8_to_wide/main.cpp | 96 | ||||
-rw-r--r-- | util/charset/generated/unidata.cpp | 12 | ||||
-rw-r--r-- | util/charset/unicode_table.h | 8 | ||||
-rw-r--r-- | util/charset/unidata.h | 6 | ||||
-rw-r--r-- | util/charset/utf8.h | 36 | ||||
-rw-r--r-- | util/charset/utf8_ut.cpp | 20 | ||||
-rw-r--r-- | util/charset/wide.h | 184 | ||||
-rw-r--r-- | util/charset/wide_sse41.cpp | 78 | ||||
-rw-r--r-- | util/charset/ya.make | 6 |
9 files changed, 223 insertions, 223 deletions
diff --git a/util/charset/benchmark/utf8_to_wide/main.cpp b/util/charset/benchmark/utf8_to_wide/main.cpp index 7683b4dd15..09fa567fe5 100644 --- a/util/charset/benchmark/utf8_to_wide/main.cpp +++ b/util/charset/benchmark/utf8_to_wide/main.cpp @@ -19,33 +19,33 @@ namespace { } }; - template <size_t N> + template <size_t N> struct TRandomRuString: public TVector<char> { - inline TRandomRuString() { + inline TRandomRuString() { TVector<unsigned char> data(N * 2); - unsigned char* textEnd = data.begin(); - for (size_t i = 0; i < N; ++i) { - size_t runeLen; + unsigned char* textEnd = data.begin(); + for (size_t i = 0; i < N; ++i) { + size_t runeLen; WriteUTF8Char(RandomNumber<ui32>(0x7FF) + 1, runeLen, textEnd); - textEnd += runeLen; - } - assign(reinterpret_cast<const char*>(data.begin()), reinterpret_cast<const char*>(textEnd)); - } - }; - + textEnd += runeLen; + } + assign(reinterpret_cast<const char*>(data.begin()), reinterpret_cast<const char*>(textEnd)); + } + }; + using RAS1 = TRandomAsciiString<1>; using RAS10 = TRandomAsciiString<10>; using RAS50 = TRandomAsciiString<50>; using RAS1000 = TRandomAsciiString<1000>; using RAS1000000 = TRandomAsciiString<1000000>; - - using RRS1 = TRandomRuString<1>; - using RRS10 = TRandomRuString<10>; - using RRS1000 = TRandomRuString<1000>; - using RRS1000000 = TRandomRuString<1000000>; + + using RRS1 = TRandomRuString<1>; + using RRS10 = TRandomRuString<10>; + using RRS1000 = TRandomRuString<1000>; + using RRS1000000 = TRandomRuString<1000000>; } -#ifdef _sse2_ +#ifdef _sse2_ #define IS_ASCII_BENCHMARK(length) \ Y_CPU_BENCHMARK(IsStringASCII##length, iface) { \ const auto& data = *Singleton<RAS##length>(); \ @@ -65,7 +65,7 @@ namespace { Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISSE(reinterpret_cast<const unsigned char*>(data.begin()), reinterpret_cast<const unsigned char*>(data.end()))); \ } \ } -#else //no sse +#else //no sse #define IS_ASCII_BENCHMARK(length) \ Y_CPU_BENCHMARK(IsStringASCIIScalar##length, iface) { \ const auto& data = *Singleton<RAS##length>(); \ @@ -79,33 +79,33 @@ namespace { Y_DO_NOT_OPTIMIZE_AWAY(::NDetail::DoIsStringASCIISlow(data.begin(), data.end())); \ } \ } -#endif - -IS_ASCII_BENCHMARK(1); -IS_ASCII_BENCHMARK(10); -IS_ASCII_BENCHMARK(50); -IS_ASCII_BENCHMARK(1000); -IS_ASCII_BENCHMARK(1000000); - -template <bool robust, typename TCharType> -inline size_t UTF8ToWideImplScalar(const char* text, size_t len, TCharType* dest, size_t& written) { - const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); - const unsigned char* last = cur + len; - TCharType* p = dest; - - ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); - written = p - dest; - return cur - reinterpret_cast<const unsigned char*>(text); -} - -template <bool robust, typename TCharType> -inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, size_t& written) { - return UTF8ToWideImpl(text, len, dest, written); -} - +#endif + +IS_ASCII_BENCHMARK(1); +IS_ASCII_BENCHMARK(10); +IS_ASCII_BENCHMARK(50); +IS_ASCII_BENCHMARK(1000); +IS_ASCII_BENCHMARK(1000000); + +template <bool robust, typename TCharType> +inline size_t UTF8ToWideImplScalar(const char* text, size_t len, TCharType* dest, size_t& written) { + const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); + const unsigned char* last = cur + len; + TCharType* p = dest; + + ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); + written = p - dest; + return cur - reinterpret_cast<const unsigned char*>(text); +} + +template <bool robust, typename TCharType> +inline size_t UTF8ToWideImplSSE(const char* text, size_t len, TCharType* dest, size_t& written) { + return UTF8ToWideImpl(text, len, dest, written); +} + static wchar16 WBUF_UTF16[10000000]; static wchar32 WBUF_UTF32[10000000]; - + #define UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(impl, length, to) \ Y_CPU_BENCHMARK(UTF8ToWideASCII##impl##length##to, iface) { \ const auto& data = *Singleton<RAS##length>(); \ @@ -113,8 +113,8 @@ static wchar32 WBUF_UTF32[10000000]; size_t written = 0; \ Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ } \ - } - + } + #define UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(impl, length, to) \ Y_CPU_BENCHMARK(UTF8ToWideRU##impl##length##to, iface) { \ const auto& data = *Singleton<RRS##length>(); \ @@ -122,8 +122,8 @@ static wchar32 WBUF_UTF32[10000000]; size_t written = 0; \ Y_DO_NOT_OPTIMIZE_AWAY(UTF8ToWideImpl##impl<false>(data.begin(), data.size(), WBUF_##to, written)); \ } \ - } - + } + UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 10, UTF16); @@ -132,7 +132,7 @@ UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(Scalar, 1000000, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_ASCII(SSE, 1000000, UTF16); - + UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(SSE, 1, UTF16); UTF8_TO_WIDE_SCALAR_BENCHMARK_RU(Scalar, 10, UTF16); diff --git a/util/charset/generated/unidata.cpp b/util/charset/generated/unidata.cpp index 27579cab8a..6f5adbbc0a 100644 --- a/util/charset/generated/unidata.cpp +++ b/util/charset/generated/unidata.cpp @@ -7530,9 +7530,9 @@ namespace NUnicode { } } // namespace NPrivate } // namespace NUnicode -namespace NUnicode { - namespace NPrivate { - const size_t DEFAULT_KEY = 0xE001; - static_assert(NUnidataTableGenerated::Size > DEFAULT_KEY, "table size should be greater then default key"); - } -} +namespace NUnicode { + namespace NPrivate { + const size_t DEFAULT_KEY = 0xE001; + static_assert(NUnidataTableGenerated::Size > DEFAULT_KEY, "table size should be greater then default key"); + } +} diff --git a/util/charset/unicode_table.h b/util/charset/unicode_table.h index 80f88a0ba5..9e171b2583 100644 --- a/util/charset/unicode_table.h +++ b/util/charset/unicode_table.h @@ -88,10 +88,10 @@ namespace NUnicodeTable { return TImpl::Get(val); } - inline TValueRef Get(size_t key) const { - return GetImpl(key); - } - + inline TValueRef Get(size_t key) const { + return GetImpl(key); + } + public: TTable(TData data, size_t size) : Data(data) diff --git a/util/charset/unidata.h b/util/charset/unidata.h index a407d1a227..400d314186 100644 --- a/util/charset/unidata.h +++ b/util/charset/unidata.h @@ -110,13 +110,13 @@ namespace NUnicode { TCombining Combining; }; - extern const size_t DEFAULT_KEY; - + extern const size_t DEFAULT_KEY; + using TUnidataTable = NUnicodeTable::TTable<NUnicodeTable::TSubtable<NUnicodeTable::UNICODE_TABLE_SHIFT, NUnicodeTable::TValues<TProperty>>>; const TUnidataTable& UnidataTable(); inline const TProperty& CharProperty(wchar32 ch) { - return UnidataTable().Get(ch, DEFAULT_KEY); + return UnidataTable().Get(ch, DEFAULT_KEY); } inline ui32 CharInfo(wchar32 ch) { diff --git a/util/charset/utf8.h b/util/charset/utf8.h index 9b4c9a05b0..5039b46ae9 100644 --- a/util/charset/utf8.h +++ b/util/charset/utf8.h @@ -16,24 +16,24 @@ inline unsigned char UTF8LeadByteMask(size_t utf8_rune_len) { } inline size_t UTF8RuneLen(const unsigned char lead_byte) { - //b0XXXXXXX - if ((lead_byte & 0x80) == 0x00) { - return 1; - } - //b110XXXXX - if ((lead_byte & 0xe0) == 0xc0) { - return 2; - } - //b1110XXXX - if ((lead_byte & 0xf0) == 0xe0) { - return 3; - } - //b11110XXX - if ((lead_byte & 0xf8) == 0xf0) { - return 4; - } - //b10XXXXXX - return 0; + //b0XXXXXXX + if ((lead_byte & 0x80) == 0x00) { + return 1; + } + //b110XXXXX + if ((lead_byte & 0xe0) == 0xc0) { + return 2; + } + //b1110XXXX + if ((lead_byte & 0xf0) == 0xe0) { + return 3; + } + //b11110XXX + if ((lead_byte & 0xf8) == 0xf0) { + return 4; + } + //b10XXXXXX + return 0; } inline size_t UTF8RuneLenByUCS(wchar32 rune) { diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp index 8eadb3f808..9e68881cca 100644 --- a/util/charset/utf8_ut.cpp +++ b/util/charset/utf8_ut.cpp @@ -108,19 +108,19 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) { UNIT_ASSERT_EXCEPTION(UTF8ToWide(text), yexception); } } - + Y_UNIT_TEST(TestUTF8ToWideScalar) { TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt")); - + TString text = in.ReadAll(); TUtf16String wtextSSE = UTF8ToWide(text); TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size()); - const unsigned char* textBegin = reinterpret_cast<const unsigned char*>(text.c_str()); - wchar16* wtextBegin = wtextScalar.begin(); - ::NDetail::UTF8ToWideImplScalar<false>(textBegin, textBegin + text.size(), wtextBegin); - UNIT_ASSERT(wtextBegin == wtextScalar.begin() + wtextSSE.size()); - UNIT_ASSERT(textBegin == reinterpret_cast<const unsigned char*>(text.end())); - wtextScalar.remove(wtextSSE.size()); - UNIT_ASSERT(wtextScalar == wtextSSE); - } + const unsigned char* textBegin = reinterpret_cast<const unsigned char*>(text.c_str()); + wchar16* wtextBegin = wtextScalar.begin(); + ::NDetail::UTF8ToWideImplScalar<false>(textBegin, textBegin + text.size(), wtextBegin); + UNIT_ASSERT(wtextBegin == wtextScalar.begin() + wtextSSE.size()); + UNIT_ASSERT(textBegin == reinterpret_cast<const unsigned char*>(text.end())); + wtextScalar.remove(wtextSSE.size()); + UNIT_ASSERT(wtextScalar == wtextSSE); + } } diff --git a/util/charset/wide.h b/util/charset/wide.h index 8e41529842..04e6928aab 100644 --- a/util/charset/wide.h +++ b/util/charset/wide.h @@ -13,12 +13,12 @@ #include <util/system/cpu_id.h> #include <util/system/yassert.h> -#include <cstring> - -#ifdef _sse2_ +#include <cstring> + +#ifdef _sse2_ #include <emmintrin.h> -#endif - +#endif + template <class T> class TTempArray; using TCharTemp = TTempArray<wchar16>; @@ -258,26 +258,26 @@ public: } }; -namespace NDetail { - template <bool robust, typename TCharType> +namespace NDetail { + template <bool robust, typename TCharType> inline void UTF8ToWideImplScalar(const unsigned char*& cur, const unsigned char* last, TCharType*& dest) noexcept { - wchar32 rune = BROKEN_RUNE; - - while (cur != last) { - if (ReadUTF8CharAndAdvance(rune, cur, last) != RECODE_OK) { - if (robust) { - rune = BROKEN_RUNE; - ++cur; - } else { - break; - } - } - - Y_ASSERT(cur <= last); - WriteSymbol(rune, dest); - } - } - + wchar32 rune = BROKEN_RUNE; + + while (cur != last) { + if (ReadUTF8CharAndAdvance(rune, cur, last) != RECODE_OK) { + if (robust) { + rune = BROKEN_RUNE; + ++cur; + } else { + break; + } + } + + Y_ASSERT(cur <= last); + WriteSymbol(rune, dest); + } + } + template <typename TCharType> inline void UTF16ToUTF32ImplScalar(const wchar16* cur, const wchar16* last, TCharType*& dest) noexcept { wchar32 rune = BROKEN_RUNE; @@ -289,28 +289,28 @@ namespace NDetail { } } - template <class TCharType> - inline void UTF8ToWideImplSSE41(const unsigned char*& /*cur*/, const unsigned char* /*last*/, TCharType*& /*dest*/) noexcept { - } - - void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar16*& dest) noexcept; + template <class TCharType> + inline void UTF8ToWideImplSSE41(const unsigned char*& /*cur*/, const unsigned char* /*last*/, TCharType*& /*dest*/) noexcept { + } + + void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar16*& dest) noexcept; void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar32*& dest) noexcept; } - + //! @return len if robust and position where encoding stopped if not template <bool robust, typename TCharType> inline size_t UTF8ToWideImpl(const char* text, size_t len, TCharType* dest, size_t& written) noexcept { const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); - const unsigned char* last = cur + len; + const unsigned char* last = cur + len; TCharType* p = dest; -#ifdef _sse_ //can't check for sse4, as we build most of arcadia without sse4 support even on platforms that support it - if (cur + 16 <= last && NX86::CachedHaveSSE41()) { - ::NDetail::UTF8ToWideImplSSE41(cur, last, p); - } -#endif +#ifdef _sse_ //can't check for sse4, as we build most of arcadia without sse4 support even on platforms that support it + if (cur + 16 <= last && NX86::CachedHaveSSE41()) { + ::NDetail::UTF8ToWideImplSSE41(cur, last, p); + } +#endif - ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); + ::NDetail::UTF8ToWideImplScalar<robust>(cur, last, p); written = p - dest; return cur - reinterpret_cast<const unsigned char*>(text); } @@ -510,22 +510,22 @@ namespace NDetail { }; template <typename TChar> - inline bool DoIsStringASCIISlow(const TChar* first, const TChar* last) { + inline bool DoIsStringASCIISlow(const TChar* first, const TChar* last) { using TUnsignedChar = std::make_unsigned_t<TChar>; - Y_ASSERT(first <= last); - for (; first != last; ++first) { - if (static_cast<TUnsignedChar>(*first) > 0x7F) { - return false; - } - } - return true; - } - - template <typename TChar> + Y_ASSERT(first <= last); + for (; first != last; ++first) { + if (static_cast<TUnsignedChar>(*first) > 0x7F) { + return false; + } + } + return true; + } + + template <typename TChar> inline bool DoIsStringASCII(const TChar* first, const TChar* last) { - if (last - first < 10) { - return DoIsStringASCIISlow(first, last); - } + if (last - first < 10) { + return DoIsStringASCIISlow(first, last); + } TMachineWord allCharBits = 0; TMachineWord nonAsciiBitMask = NonASCIIMask<sizeof(TMachineWord), TChar>::Value(); @@ -557,40 +557,40 @@ namespace NDetail { return !(allCharBits & nonAsciiBitMask); } -#ifdef _sse2_ - inline bool DoIsStringASCIISSE(const unsigned char* first, const unsigned char* last) { - //scalar version for short strings - if (first + 8 > last) { - return ::NDetail::DoIsStringASCIISlow(first, last); - } - - alignas(16) unsigned char buf[16]; - - while (first + 16 <= last) { - memcpy(buf, first, 16); - __m128i chunk = _mm_load_si128(reinterpret_cast<__m128i*>(buf)); - - int asciiMask = _mm_movemask_epi8(chunk); - if (asciiMask) { +#ifdef _sse2_ + inline bool DoIsStringASCIISSE(const unsigned char* first, const unsigned char* last) { + //scalar version for short strings + if (first + 8 > last) { + return ::NDetail::DoIsStringASCIISlow(first, last); + } + + alignas(16) unsigned char buf[16]; + + while (first + 16 <= last) { + memcpy(buf, first, 16); + __m128i chunk = _mm_load_si128(reinterpret_cast<__m128i*>(buf)); + + int asciiMask = _mm_movemask_epi8(chunk); + if (asciiMask) { return false; } - first += 16; + first += 16; } - - if (first + 8 <= last) { - memcpy(buf, first, 8); - __m128i chunk = _mm_loadl_epi64(reinterpret_cast<__m128i*>(buf)); - - int asciiMask = _mm_movemask_epi8(chunk); - if (asciiMask) { - return false; - } - first += 8; - } - - return ::NDetail::DoIsStringASCIISlow(first, last); + + if (first + 8 <= last) { + memcpy(buf, first, 8); + __m128i chunk = _mm_loadl_epi64(reinterpret_cast<__m128i*>(buf)); + + int asciiMask = _mm_movemask_epi8(chunk); + if (asciiMask) { + return false; + } + first += 8; + } + + return ::NDetail::DoIsStringASCIISlow(first, last); } -#endif //_sse2_ +#endif //_sse2_ } @@ -600,17 +600,17 @@ inline bool IsStringASCII(const TChar* first, const TChar* last) { return ::NDetail::DoIsStringASCII(first, last); } -#ifdef _sse2_ -template <> -inline bool IsStringASCII<unsigned char>(const unsigned char* first, const unsigned char* last) { - return ::NDetail::DoIsStringASCIISSE(first, last); -} -template <> -inline bool IsStringASCII<char>(const char* first, const char* last) { - return ::NDetail::DoIsStringASCIISSE(reinterpret_cast<const unsigned char*>(first), reinterpret_cast<const unsigned char*>(last)); -} -#endif - +#ifdef _sse2_ +template <> +inline bool IsStringASCII<unsigned char>(const unsigned char* first, const unsigned char* last) { + return ::NDetail::DoIsStringASCIISSE(first, last); +} +template <> +inline bool IsStringASCII<char>(const char* first, const char* last) { + return ::NDetail::DoIsStringASCIISSE(reinterpret_cast<const unsigned char*>(first), reinterpret_cast<const unsigned char*>(last)); +} +#endif + //! copies elements from one character sequence to another using memcpy //! for compatibility only template <typename TChar> diff --git a/util/charset/wide_sse41.cpp b/util/charset/wide_sse41.cpp index 6859e9c44c..d1f2a74851 100644 --- a/util/charset/wide_sse41.cpp +++ b/util/charset/wide_sse41.cpp @@ -1,6 +1,6 @@ -#include <util/charset/wide.h> +#include <util/charset/wide.h> #include <util/system/types.h> - + #ifdef SSE41_STUB namespace NDetail { @@ -13,21 +13,21 @@ namespace NDetail { #else #include <util/system/compiler.h> - + #include <cstring> #include <emmintrin.h> #include <smmintrin.h> - -//processes to the first error, or until less then 16 bytes left -//most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html - + +//processes to the first error, or until less then 16 bytes left +//most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html + //return dstAdvance 0 in case of problems static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned char*& cur, __m128i& utf16Low, __m128i& utf16High) { unsigned char curAligned[16]; - + memcpy(curAligned, cur, sizeof(__m128i)); __m128i chunk = _mm_load_si128(reinterpret_cast<const __m128i*>(curAligned)); - + //only ascii characters - simple copy if (!_mm_movemask_epi8(chunk)) { utf16Low = _mm_unpacklo_epi8(chunk, _mm_setzero_si128()); @@ -35,68 +35,68 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch cur += 16; return 16; } - + __m128i chunkSigned = _mm_add_epi8(chunk, _mm_set1_epi8(0x80)); __m128i isAsciiMask = _mm_cmpgt_epi8(chunk, _mm_set1_epi8(0)); - + __m128i cond2 = _mm_cmplt_epi8(_mm_set1_epi8(0xc2 - 1 - 0x80), chunkSigned); __m128i state = _mm_set1_epi8(0x0 | (char)0x80); - + __m128i cond3 = _mm_cmplt_epi8(_mm_set1_epi8(0xe0 - 1 - 0x80), chunkSigned); state = _mm_blendv_epi8(state, _mm_set1_epi8(0x2 | (char)0xc0), cond2); - + int sourceAdvance; __m128i shifts; __m128i chunkLow, chunkHigh; - + if (Y_LIKELY(!_mm_movemask_epi8(cond3))) { //main case: no bloks of size 3 or 4 - + //rune len for start of multi-byte sequences (0 for b0... and b10..., 2 for b110..., etc.) __m128i count = _mm_and_si128(state, _mm_set1_epi8(0x7)); - + __m128i countSub1 = _mm_subs_epu8(count, _mm_set1_epi8(0x1)); - + shifts = countSub1; __m128i continuation1 = _mm_slli_si128(countSub1, 1); - + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 1)); shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 2)); - + __m128i counts = _mm_or_si128(count, continuation1); - + __m128i isBeginMultibyteMask = _mm_cmpgt_epi8(count, _mm_set1_epi8(0)); __m128i needNoContinuationMask = _mm_cmpeq_epi8(continuation1, _mm_set1_epi8(0)); __m128i isBeginMask = _mm_add_epi8(isBeginMultibyteMask, isAsciiMask); //each symbol should be exactly one of ascii, continuation or begin __m128i okMask = _mm_cmpeq_epi8(isBeginMask, needNoContinuationMask); - + if (_mm_movemask_epi8(okMask) != 0xFFFF) { return 0; } - + shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 4)); - + __m128i mask = _mm_and_si128(state, _mm_set1_epi8(0xf8)); shifts = _mm_add_epi8(shifts, _mm_slli_si128(shifts, 8)); - + chunk = _mm_andnot_si128(mask, chunk); // from now on, we only have usefull bits shifts = _mm_and_si128(shifts, _mm_cmplt_epi8(counts, _mm_set1_epi8(2))); // <=1 - + __m128i chunk_right = _mm_slli_si128(chunk, 1); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); - + chunkLow = _mm_blendv_epi8(chunk, _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); - + chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); - + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); chunkHigh = _mm_srli_epi32(chunkHigh, 2); - + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); @@ -166,15 +166,15 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch chunkHigh = _mm_or_si128(chunkHigh, _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4), _mm_set1_epi8(0xf0)), mask3)); - + int c = _mm_extract_epi16(counts, 7); sourceAdvance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15 : 14; } - + shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 8), _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8)); - + chunkHigh = _mm_slli_si128(chunkHigh, 1); __m128i shuf = _mm_add_epi8(shifts, _mm_set_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); @@ -206,16 +206,16 @@ namespace NDetail { _mm_store_si128(reinterpret_cast<__m128i*>(destAligned), utf16Low); _mm_store_si128(reinterpret_cast<__m128i*>(destAligned) + 1, utf16High); - memcpy(dest, destAligned, sizeof(__m128i) * 2); + memcpy(dest, destAligned, sizeof(__m128i) * 2); dest += dstAdvance; } //The rest will be handled sequencially. // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence } - + void UTF8ToWideImplSSE41(const unsigned char*& cur, const unsigned char* last, wchar32*& dest) noexcept { alignas(16) wchar32 destAligned[16]; - + while (cur + 16 <= last) { __m128i utf16Low; __m128i utf16High; @@ -238,10 +238,10 @@ namespace NDetail { memcpy(dest, destAligned, sizeof(__m128i) * 4); dest += dstAdvance; - } - //The rest will be handled sequencially. + } + //The rest will be handled sequencially. // Possible improvement: go back to the vectorized processing after the error or the 4 byte sequence - } -} + } +} #endif diff --git a/util/charset/ya.make b/util/charset/ya.make index e41a9b76e7..26d38cb10b 100644 --- a/util/charset/ya.make +++ b/util/charset/ya.make @@ -21,13 +21,13 @@ JOIN_SRCS( IF (ARCH_X86_64 AND NOT DISABLE_INSTRUCTION_SETS) SRC_CPP_SSE41(wide_sse41.cpp) -ELSE() +ELSE() SRC( wide_sse41.cpp -DSSE41_STUB ) -ENDIF() - +ENDIF() + END() RECURSE_FOR_TESTS( |