aboutsummaryrefslogtreecommitdiffstats
path: root/util/charset/wide_sse41.cpp
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /util/charset/wide_sse41.cpp
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'util/charset/wide_sse41.cpp')
-rw-r--r--util/charset/wide_sse41.cpp48
1 files changed, 24 insertions, 24 deletions
diff --git a/util/charset/wide_sse41.cpp b/util/charset/wide_sse41.cpp
index d1f2a74851..2326424468 100644
--- a/util/charset/wide_sse41.cpp
+++ b/util/charset/wide_sse41.cpp
@@ -6,17 +6,17 @@
namespace NDetail {
void UTF8ToWideImplSSE41(const unsigned char*&, const unsigned char*, wchar16*&) noexcept {
}
- void UTF8ToWideImplSSE41(const unsigned char*&, const unsigned char*, wchar32*&) noexcept {
- }
+ void UTF8ToWideImplSSE41(const unsigned char*&, const unsigned char*, wchar32*&) noexcept {
+ }
}
#else
- #include <util/system/compiler.h>
+ #include <util/system/compiler.h>
- #include <cstring>
- #include <emmintrin.h>
- #include <smmintrin.h>
+ #include <cstring>
+ #include <emmintrin.h>
+ #include <smmintrin.h>
//processes to the first error, or until less then 16 bytes left
//most code taken from https://woboq.com/blog/utf-8-processing-using-simd.html
@@ -40,10 +40,10 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
__m128i isAsciiMask = _mm_cmpgt_epi8(chunk, _mm_set1_epi8(0));
__m128i cond2 = _mm_cmplt_epi8(_mm_set1_epi8(0xc2 - 1 - 0x80), chunkSigned);
- __m128i state = _mm_set1_epi8(0x0 | (char)0x80);
+ __m128i state = _mm_set1_epi8(0x0 | (char)0x80);
__m128i cond3 = _mm_cmplt_epi8(_mm_set1_epi8(0xe0 - 1 - 0x80), chunkSigned);
- state = _mm_blendv_epi8(state, _mm_set1_epi8(0x2 | (char)0xc0), cond2);
+ state = _mm_blendv_epi8(state, _mm_set1_epi8(0x2 | (char)0xc0), cond2);
int sourceAdvance;
__m128i shifts;
@@ -85,20 +85,20 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
__m128i chunk_right = _mm_slli_si128(chunk, 1);
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1),
- _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1));
chunkLow = _mm_blendv_epi8(chunk,
- _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))),
- _mm_cmpeq_epi8(counts, _mm_set1_epi8(1)));
+ _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))),
+ _mm_cmpeq_epi8(counts, _mm_set1_epi8(1)));
chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2)));
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2),
- _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2));
chunkHigh = _mm_srli_epi32(chunkHigh, 2);
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4),
- _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4));
int c = _mm_extract_epi16(counts, 7);
sourceAdvance = !(c & 0x0200) ? 16 : 15;
@@ -107,7 +107,7 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
__m128i mask3 = _mm_slli_si128(cond3, 1);
__m128i cond4 = _mm_cmplt_epi8(_mm_set1_epi8(0xf0 - 1 - 0x80), chunkSigned);
- state = _mm_blendv_epi8(state, _mm_set1_epi8(0x3 | (char)0xe0), cond3);
+ state = _mm_blendv_epi8(state, _mm_set1_epi8(0x3 | (char)0xe0), cond3);
// 4 bytes sequences are not vectorize. Fall back to the scalar processing
if (Y_UNLIKELY(_mm_movemask_epi8(cond4))) {
@@ -149,31 +149,31 @@ static Y_FORCE_INLINE ui32 Unpack16BytesIntoUtf16IfNoSurrogats(const unsigned ch
__m128i chunk_right = _mm_slli_si128(chunk, 1);
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 1),
- _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 7), 1));
chunkLow = _mm_blendv_epi8(chunk,
- _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))),
- _mm_cmpeq_epi8(counts, _mm_set1_epi8(1)));
+ _mm_or_si128(chunk, _mm_and_si128(_mm_slli_epi16(chunk_right, 6), _mm_set1_epi8(0xc0))),
+ _mm_cmpeq_epi8(counts, _mm_set1_epi8(1)));
chunkHigh = _mm_and_si128(chunk, _mm_cmpeq_epi8(counts, _mm_set1_epi8(2)));
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 2),
- _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 6), 2));
chunkHigh = _mm_srli_epi32(chunkHigh, 2);
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 4),
- _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 5), 4));
chunkHigh = _mm_or_si128(chunkHigh,
- _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4), _mm_set1_epi8(0xf0)),
- mask3));
+ _mm_and_si128(_mm_and_si128(_mm_slli_epi32(chunk_right, 4), _mm_set1_epi8(0xf0)),
+ mask3));
int c = _mm_extract_epi16(counts, 7);
- sourceAdvance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15
- : 14;
+ sourceAdvance = !(c & 0x0200) ? 16 : !(c & 0x02) ? 15
+ : 14;
}
shifts = _mm_blendv_epi8(shifts, _mm_srli_si128(shifts, 8),
- _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8));
+ _mm_srli_si128(_mm_slli_epi16(shifts, 4), 8));
chunkHigh = _mm_slli_si128(chunkHigh, 1);