diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /util/charset/wide_sse41.cpp | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'util/charset/wide_sse41.cpp')
-rw-r--r-- | util/charset/wide_sse41.cpp | 48 |
1 files changed, 24 insertions, 24 deletions
diff --git a/util/charset/wide_sse41.cpp b/util/charset/wide_sse41.cpp index d1f2a74851..2326424468 100644 --- a/util/charset/wide_sse41.cpp +++ b/util/charset/wide_sse41.cpp @@ -6,17 +6,17 @@ namespace NDetail { void UTF8ToWideImplSSE41(const unsigned char*&, const unsigned char*, wchar16*&) noexcept { } - void UTF8ToWideImplSSE41(const unsigned char*&, const unsigned char*, wchar32*&) noexcept { - } + void UTF8ToWideImplSSE41(const unsigned char*&, const unsigned char*, wchar32*&) noexcept { + } } #else - #include <util/system/compiler.h> + #include <util/system/compiler.h> - #include <cstring> - #include <emmintrin.h> - #include <smmintrin.h> + #include <cstring> + #include <emmintrin.h> + #include <smmintrin.h> //processes to the first error, or until less then 16 bytes left //most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html @@ -40,10 +40,10 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch __m128i isAsciiMask = _mm_cmpgt_epi8(chunk, _mm_set1_epi8(0)); __m128i cond2 = _mm_cmplt_epi8(_mm_set1_epi8(0xc2 - 1 - 0x80), chunkSigned); - __m128i state = _mm_set1_epi8(0x0 | (char)0x80); + __m128i state = _mm_set1_epi8(0x0 | (char)0x80); __m128i cond3 = _mm_cmplt_epi8(_mm_set1_epi8(0xe0 - 1 - 0x80), chunkSigned); - state = _mm_blendv_epi8(state, _mm_set1_epi8(0x2 | (char)0xc0), cond2); + state = _mm_blendv_epi8(state, _mm_set1_epi8(0x2 | (char)0xc0), cond2); int sourceAdvance; __m128i shifts; @@ -85,20 +85,20 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch __m128i chunk_right = _mm_slli_si128(chunk, 1); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), - _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); + _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); chunkLow = _mm_blendv_epi8(chunk, - _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), - _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); + _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), + _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), - _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); + _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); chunkHigh = _mm_srli_epi32(chunkHigh, 2); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), - _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); + _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); int c = _mm_extract_epi16(counts, 7); sourceAdvance = !(c & 0x0200) ? 16 : 15; @@ -107,7 +107,7 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch __m128i mask3 = _mm_slli_si128(cond3, 1); __m128i cond4 = _mm_cmplt_epi8(_mm_set1_epi8(0xf0 - 1 - 0x80), chunkSigned); - state = _mm_blendv_epi8(state, _mm_set1_epi8(0x3 | (char)0xe0), cond3); + state = _mm_blendv_epi8(state, _mm_set1_epi8(0x3 | (char)0xe0), cond3); // 4 bytes sequences are not vectorize. Fall back to the scalar processing if (Y_UNLIKELY(_mm_movemask_epi8(cond4))) { @@ -149,31 +149,31 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch __m128i chunk_right = _mm_slli_si128(chunk, 1); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1), - _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); + _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1)); chunkLow = _mm_blendv_epi8(chunk, - _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), - _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); + _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))), + _mm_cmpeq_epi8(counts, _mm_set1_epi8(1))); chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2))); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2), - _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); + _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2)); chunkHigh = _mm_srli_epi32(chunkHigh, 2); shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4), - _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); + _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4)); chunkHigh = _mm_or_si128(chunkHigh, - _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4), _mm_set1_epi8(0xf0)), - mask3)); + _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4), _mm_set1_epi8(0xf0)), + mask3)); int c = _mm_extract_epi16(counts, 7); - sourceAdvance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15 - : 14; + sourceAdvance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15 + : 14; } shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 8), - _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8)); + _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8)); chunkHigh = _mm_slli_si128(chunkHigh, 1); |