diff options
author | fixthgame <[email protected]> | 2023-11-02 14:00:58 +0300 |
---|---|---|
committer | fixthgame <[email protected]> | 2023-11-02 14:37:12 +0300 |
commit | ddd7b74c610291ab16eeaab1ebca939e2b7d8c43 (patch) | |
tree | 14721ed25d5da0c2b3a5b32da64e14f1e07f2c5b | |
parent | 55f69226ed874bfe31b171e1951096e65026c4c5 (diff) |
Bug fix + CRC32+BLEND TEST
fix bugs
-rw-r--r-- | .mapping.json | 4 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt | 17 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt | 18 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt | 18 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/CMakeLists.txt | 11 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt | 17 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/exec/ya.make | 20 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/simd_avx2.h | 40 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/simd_fallback.h | 40 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/simd_sse42.h | 35 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/simd_ut.cpp | 234 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt | 1 | ||||
-rw-r--r-- | ydb/library/yql/utils/simd/ya.make | 15 |
16 files changed, 368 insertions, 105 deletions
diff --git a/.mapping.json b/.mapping.json index 30056cdd6e7..537f8d29e36 100644 --- a/.mapping.json +++ b/.mapping.json @@ -8770,7 +8770,11 @@ "ydb/library/yql/utils/log/ut/CMakeLists.linux-x86_64.txt":"", "ydb/library/yql/utils/log/ut/CMakeLists.txt":"", "ydb/library/yql/utils/log/ut/CMakeLists.windows-x86_64.txt":"", + "ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt":"", + "ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt":"", + "ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt":"", "ydb/library/yql/utils/simd/CMakeLists.txt":"", + "ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt":"", "ydb/library/yql/utils/simd/exec/CMakeLists.txt":"", "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt":"", "ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt":"", diff --git a/ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt new file mode 100644 index 00000000000..7ac871c7202 --- /dev/null +++ b/ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(exec) +add_subdirectory(ut) + +add_library(yql-utils-simd INTERFACE) +target_link_libraries(yql-utils-simd INTERFACE + contrib-libs-cxxsupp + yutil + cpp-digest-crc32c +) diff --git a/ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt new file mode 100644 index 00000000000..09fa2b12b6b --- /dev/null +++ b/ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(exec) +add_subdirectory(ut) + +add_library(yql-utils-simd INTERFACE) +target_link_libraries(yql-utils-simd INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-digest-crc32c +) diff --git a/ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt new file mode 100644 index 00000000000..09fa2b12b6b --- /dev/null +++ b/ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt @@ -0,0 +1,18 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(exec) +add_subdirectory(ut) + +add_library(yql-utils-simd INTERFACE) +target_link_libraries(yql-utils-simd INTERFACE + contrib-libs-linux-headers + contrib-libs-cxxsupp + yutil + cpp-digest-crc32c +) diff --git a/ydb/library/yql/utils/simd/CMakeLists.txt b/ydb/library/yql/utils/simd/CMakeLists.txt index 4118c479b56..f8b31df0c11 100644 --- a/ydb/library/yql/utils/simd/CMakeLists.txt +++ b/ydb/library/yql/utils/simd/CMakeLists.txt @@ -6,5 +6,12 @@ # original buildsystem will not be accepted. -add_subdirectory(exec) -add_subdirectory(ut) +if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-aarch64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64") + include(CMakeLists.darwin-x86_64.txt) +elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA) + include(CMakeLists.windows-x86_64.txt) +elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA) + include(CMakeLists.linux-x86_64.txt) +endif() diff --git a/ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt new file mode 100644 index 00000000000..7ac871c7202 --- /dev/null +++ b/ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt @@ -0,0 +1,17 @@ + +# This file was generated by the build system used internally in the Yandex monorepo. +# Only simple modifications are allowed (adding source-files to targets, adding simple properties +# like target_include_directories). These modifications will be ported to original +# ya.make files by maintainers. Any complex modifications which can't be ported back to the +# original buildsystem will not be accepted. + + +add_subdirectory(exec) +add_subdirectory(ut) + +add_library(yql-utils-simd INTERFACE) +target_link_libraries(yql-utils-simd INTERFACE + contrib-libs-cxxsupp + yutil + cpp-digest-crc32c +) diff --git a/ydb/library/yql/utils/simd/exec/ya.make b/ydb/library/yql/utils/simd/exec/ya.make index 91e55d4d64d..3fcf4d03e02 100644 --- a/ydb/library/yql/utils/simd/exec/ya.make +++ b/ydb/library/yql/utils/simd/exec/ya.make @@ -1 +1,19 @@ -RECURSE(stream_store)
\ No newline at end of file +EXECTEST() + +RUN( + stream_store +) + +DEPENDS( + ydb/library/yql/utils/simd/exec/stream_store +) + +PEERDIR( + ydb/library/yql/utils/simd +) + +END() + +RECURSE( + stream_store +)
\ No newline at end of file diff --git a/ydb/library/yql/utils/simd/simd_avx2.h b/ydb/library/yql/utils/simd/simd_avx2.h index f8b78dc81c1..4e03841843b 100644 --- a/ydb/library/yql/utils/simd/simd_avx2.h +++ b/ydb/library/yql/utils/simd/simd_avx2.h @@ -77,11 +77,27 @@ struct TBase8: TBase<TSimd8<T>> { template<int N> inline TSimd8<T> Blend16(const TSimd8<T> other) { - return _mm256_blend_epi16(this->Value, other->Value, N); + return _mm256_blend_epi16(this->Value, other.Value, N); } inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) { - return _mm256_blendv_epi8(this->Value, other->Value, mask); + return _mm256_blendv_epi8(this->Value, other.Value, mask.Value); + } + + static inline ui32 CRC32u8(ui32 crc, ui8 data) { + return _mm_crc32_u8(crc, data); + } + + static inline ui32 CRC32u16(ui32 crc, ui16 data) { + return _mm_crc32_u16(crc, data); + } + + static inline ui32 CRC32u32(ui32 crc, ui32 data) { + return _mm_crc32_u32(crc, data); + } + + static inline ui64 CRC32u64(ui64 crc, ui64 data) { + return _mm_crc32_u64(crc, data); } friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) { @@ -113,10 +129,18 @@ struct TSimd8<bool>: TBase8<bool> { return _mm256_set1_epi8(ui8(-(!!value))); } + inline int ToBitMask() const { + return _mm256_movemask_epi8(this->Value); + } + inline bool Any() const { return !_mm256_testz_si256(this->Value, this->Value); } + inline bool All() const { + return this->ToBitMask() == i32(0xFFFFFFFF); + } + inline TSimd8<bool> operator~() const { return *this ^ true; } @@ -148,8 +172,8 @@ struct TBase8Numeric: TBase8<T> { return _mm256_load_si256(reinterpret_cast<const __m256i *>(values)); } - inline void LoadStream(T dst[16]) const { - return _mm256_stream_load_si256(reinterpret_cast<__m256i *>(dst), this->Value); + static inline TSimd8<T> LoadStream(T dst[32]) { + return _mm256_stream_load_si256(reinterpret_cast<__m256i *>(dst)); } inline void Store(T dst[32]) const { @@ -160,7 +184,7 @@ struct TBase8Numeric: TBase8<T> { return _mm256_store_si256(reinterpret_cast<__m256i *>(dst), this->Value); } - inline void StoreStream(T dst[16]) const { + inline void StoreStream(T dst[32]) const { return _mm256_stream_si256(reinterpret_cast<__m256i *>(dst), this->Value); } @@ -170,7 +194,9 @@ struct TBase8Numeric: TBase8<T> { TSimd8<T> mask1(0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70); TSimd8<T> perm = _mm256_permute4x64_epi64(this->Value, 0x4E); - return Shuffle128(other + mask0) | perm.Shuffle128(other + mask1); + TSimd8<T> tmp = Shuffle128(other + mask0) | perm.Shuffle128(other + mask1); + TSimd8<T> mask = _mm256_cmpgt_epi8(other.Value, _mm256_set1_epi8(-1)); + return tmp & mask; } inline TSimd8<T> Shuffle128(const TSimd8<T> other) const { @@ -193,7 +219,7 @@ struct TBase8Numeric: TBase8<T> { void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") { const size_t n = sizeof(this->Value) / sizeof(TOut); TOut buf[n]; - this->Store((i8*) buf); + this->Store((T*) buf); if (n == sizeof(this->Value)) { for (size_t i = 0; i < n; i += 1) { out << int(buf[i]); diff --git a/ydb/library/yql/utils/simd/simd_fallback.h b/ydb/library/yql/utils/simd/simd_fallback.h index 7bd0009e7a2..b225fa939d3 100644 --- a/ydb/library/yql/utils/simd/simd_fallback.h +++ b/ydb/library/yql/utils/simd/simd_fallback.h @@ -3,6 +3,8 @@ #include <cstdint> #include <immintrin.h> +#include <library/cpp/digest/crc32c/crc32c.h> + #include <util/system/types.h> #include <util/stream/output.h> #include <util/generic/string.h> @@ -80,7 +82,7 @@ struct TBase8: TBase<TSimd8<T>> { size_t j = (1 << 16) - 1; for (size_t i = 0; i < 4; i += 1, j <<= 16) { if (N & (1LL << i)) { - dst |= other->Value & j; + dst |= other.Value & j; } else { dst |= this->Value & j; } @@ -92,8 +94,8 @@ struct TBase8: TBase<TSimd8<T>> { ui64 dst = 0; size_t j = (1 << 8) - 1; for (size_t i = 0; i < 8; i += 1, j <<= 8) { - if (mask.Value & (1LL << i)) { - dst |= other->Value & j; + if (mask.Value & (1uLL << (i * 8 + 7))) { + dst |= other.Value & j; } else { dst |= this->Value & j; } @@ -101,6 +103,22 @@ struct TBase8: TBase<TSimd8<T>> { return TSimd8<T>(dst); } + static inline ui32 CRC32u8(ui32 crc, ui8 data) { + return ~Crc32cExtend(~crc, (void*) &data, 1); + } + + static inline ui32 CRC32u16(ui32 crc, ui16 data) { + return ~Crc32cExtend(~crc, (void*) &data, 2); + } + + static inline ui32 CRC32u32(ui32 crc, ui32 data) { + return ~Crc32cExtend(~crc, (void*) &data, 4); + } + + static inline ui64 CRC32u64(ui64 crc, ui64 data) { + return ~Crc32cExtend(~crc, (void*) &data, 8); + } + friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) { return lhs.Value == rhs.Value; } @@ -130,10 +148,24 @@ struct TSimd8<bool>: TBase8<bool> { return ui64(-value); } + inline int ToBitMask() const { + int result = 0; + for (size_t j = 0; j < 8; j += 1) { + if ((1ULL << (j * 8 + 7)) & this->Value) { + result |= (1 << j); + } + } + return result; + } + inline bool Any() const { return Value != 0; } + inline bool All() const { + return this->Value == ui64(-1); + } + inline TSimd8<bool> operator~() const { return *this ^ true; } @@ -207,7 +239,7 @@ struct TBase8Numeric: TBase8<T> { void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") { const size_t n = sizeof(this->Value) / sizeof(TOut); TOut buf[n]; - Store((i8*) buf); + Store((T*) buf); if (n == sizeof(this->Value)) { for (size_t i = 0; i < n; i += 1) { out << int(buf[i]); diff --git a/ydb/library/yql/utils/simd/simd_sse42.h b/ydb/library/yql/utils/simd/simd_sse42.h index 835ecc13cc3..a75eadcff93 100644 --- a/ydb/library/yql/utils/simd/simd_sse42.h +++ b/ydb/library/yql/utils/simd/simd_sse42.h @@ -77,11 +77,27 @@ struct TBase8: TBase<TSimd8<T>> { template<int N> inline TSimd8<T> Blend16(const TSimd8<T> other) { - return _mm_blend_epi16(this->Value, other->Value, N); + return _mm_blend_epi16(this->Value, other.Value, N); } inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) { - return _mm_blendv_epi8(this->Value, other->Value, mask); + return _mm_blendv_epi8(this->Value, other.Value, mask.Value); + } + + static inline ui32 CRC32u8(ui32 crc, ui8 data) { + return _mm_crc32_u8(crc, data); + } + + static inline ui32 CRC32u16(ui32 crc, ui16 data) { + return _mm_crc32_u16(crc, data); + } + + static inline ui32 CRC32u32(ui32 crc, ui32 data) { + return _mm_crc32_u32(crc, data); + } + + static inline ui64 CRC32u64(ui64 crc, ui64 data) { + return _mm_crc32_u64(crc, data); } friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) { @@ -113,10 +129,18 @@ struct TSimd8<bool>: TBase8<bool> { return _mm_set1_epi8(ui8(-(!!value))); } + inline int ToBitMask() const { + return _mm_movemask_epi8(this->Value); + } + inline bool Any() const { return !_mm_testz_si128(this->Value, this->Value); } + inline bool All() const { + return this->ToBitMask() == i32(0xFFFF); + } + inline TSimd8<bool> operator~() const { return *this ^ true; } @@ -148,9 +172,8 @@ struct TBase8Numeric: TBase8<T> { return _mm_load_si128(reinterpret_cast<const __m128i *>(values)); } - - inline void LoadStream(T dst[16]) const { - return _mm_stream_load_si128(reinterpret_cast<__m128i *>(dst), this->Value); + static inline TSimd8<T> LoadStream(T dst[16]) { + return _mm_stream_load_si128(reinterpret_cast<__m128i *>(dst)); } inline void Store(T dst[16]) const { @@ -189,7 +212,7 @@ struct TBase8Numeric: TBase8<T> { void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") { const size_t n = sizeof(this->Value) / sizeof(TOut); TOut buf[n]; - Store((i8*) buf); + Store((T*) buf); if (n == sizeof(this->Value)) { for (size_t i = 0; i < n; i += 1) { out << int(buf[i]); diff --git a/ydb/library/yql/utils/simd/simd_ut.cpp b/ydb/library/yql/utils/simd/simd_ut.cpp index 104db0e58ed..9c135e6f50f 100644 --- a/ydb/library/yql/utils/simd/simd_ut.cpp +++ b/ydb/library/yql/utils/simd/simd_ut.cpp @@ -38,20 +38,20 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) { } TSimd8<bool> tr(true); TSimd8<bool> fal(false); - UNIT_ASSERT_EQUAL(tr.Any(), true); - UNIT_ASSERT_EQUAL(fal.Any(), false); - UNIT_ASSERT_UNEQUAL(tr.Any(), fal.Any()); - UNIT_ASSERT_EQUAL(tr.Any(), (tr ^ fal).Any()); - UNIT_ASSERT_EQUAL(fal.Any(), (tr ^ tr).Any()); - UNIT_ASSERT_EQUAL(fal.Any(), (tr & fal).Any()); - UNIT_ASSERT_EQUAL((~tr).Any(), fal.Any()); - UNIT_ASSERT_EQUAL((~fal).Any(), tr.Any()); + UNIT_ASSERT_EQUAL(tr.All(), true); + UNIT_ASSERT_EQUAL(fal.All(), false); + UNIT_ASSERT_UNEQUAL(tr.All(), fal.All()); + UNIT_ASSERT_EQUAL(tr.All(), (tr ^ fal).All()); + UNIT_ASSERT_EQUAL(fal.All(), (tr ^ tr).All()); + UNIT_ASSERT_EQUAL(fal.All(), (tr & fal).All()); + UNIT_ASSERT_EQUAL((~tr).All(), fal.All()); + UNIT_ASSERT_EQUAL((~fal).All(), tr.All()); TSimd8<bool> bit_or = tr | fal; - UNIT_ASSERT_EQUAL(bit_or.Any(), tr.Any()); + UNIT_ASSERT_EQUAL(bit_or.All(), tr.All()); TSimd8<bool> tr_m(_mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)); - UNIT_ASSERT_EQUAL((tr_m == tr).Any(), TSimd8<bool>(true).Any()); + UNIT_ASSERT_EQUAL((tr_m == tr).All(), TSimd8<bool>(true).All()); } Y_UNIT_TEST(SimdUInt8) { if (!NX86::HaveAVX2()) { @@ -63,26 +63,26 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) { i = 0; } TSimd8<ui8> a(x), b(arr), c(ui8(0)); - UNIT_ASSERT_EQUAL((a == b).Any(), true); - UNIT_ASSERT_EQUAL((b == c).Any(), true); - UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).Any(), true); + UNIT_ASSERT_EQUAL((a == b).All(), true); + UNIT_ASSERT_EQUAL((b == c).All(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).All(), true); a = TSimd8<ui8>(ui8(50)); b = TSimd8<ui8>(ui8(49)); - UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false); - UNIT_ASSERT_EQUAL(c.BitsNotSet().Any(), true); - UNIT_ASSERT_EQUAL(a.BitsNotSet().Any(), false); - UNIT_ASSERT_EQUAL(a.AnyBitsSet().Any(), true); + UNIT_ASSERT_EQUAL(c.BitsNotSet().All(), true); + UNIT_ASSERT_EQUAL(a.BitsNotSet().All(), false); + UNIT_ASSERT_EQUAL(a.AnyBitsSet().All(), true); TSimd8<ui8> a2(ui8(100)); TSimd8<ui8> a3(ui8(25)); - UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).Any(), true); - UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).Any(), true); - UNIT_ASSERT_EQUAL((a.Shr<8>() == c).Any(), true); + UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).All(), true); + UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).All(), true); + UNIT_ASSERT_EQUAL((a.Shr<8>() == c).All(), true); } Y_UNIT_TEST(SimdInt8) { @@ -95,23 +95,23 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) { i = 0; } TSimd8<i8> a(x), b(arr), c(i8(0)); - UNIT_ASSERT_EQUAL((a == b).Any(), true); - UNIT_ASSERT_EQUAL((b == c).Any(), true); - UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).Any(), true); + UNIT_ASSERT_EQUAL((a == b).All(), true); + UNIT_ASSERT_EQUAL((b == c).All(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).All(), true); a = TSimd8<i8>(i8(50)); b = TSimd8<i8>(i8(49)); - UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false); TSimd8<i8> a2(i8(5)); TSimd8<i8> a3(i8(25)); a = TSimd8<i8>(i8(15)); b = TSimd8<i8>(i8(10)); - UNIT_ASSERT_EQUAL(((a + b) == a3).Any(), true); - UNIT_ASSERT_EQUAL(((a - b) == a2).Any(), true); + UNIT_ASSERT_EQUAL(((a + b) == a3).All(), true); + UNIT_ASSERT_EQUAL(((a - b) == a2).All(), true); } Y_UNIT_TEST(SimdTrait) { @@ -137,7 +137,7 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); TSimd8<i8> result = tmp.Shuffle(index); UNIT_ASSERT_EQUAL((result == TSimd8<i8>(7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0, - 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0)).Any(), true); + 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0)).All(), true); } Y_UNIT_TEST(Shuffle128) { @@ -147,7 +147,7 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); TSimd8<i8> result = tmp.Shuffle128(index); UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0, - 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).Any(), true); + 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).All(), true); } Y_UNIT_TEST(ShiftBytes) { @@ -160,15 +160,46 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) { 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0); TSimd8<i8> Shift5Right( 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).Any(), true); - UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).Any(), true); + UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).All(), true); + UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).All(), true); } Y_UNIT_TEST(UnpackMask) { TSimd8<i8> unpackMask = NSimd::CreateUnpackMask<NSimd::TSimdAVX2Traits>(2, 1, false); unpackMask.Log<i8>(Cerr); UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10, 11, - -1, 12, 13, -1, 14, 15, -1, 16, 17, -1, 18, 19, -1, 20, 21)).Any(), true); + -1, 12, 13, -1, 14, 15, -1, 16, 17, -1, 18, 19, -1, 20, 21)).All(), true); + } + + Y_UNIT_TEST(CRC) { + ui32 val = 0x454234; + UNIT_ASSERT_EQUAL(TSimd8<i8>::CRC32u32(0, val), 1867938110); + } + + Y_UNIT_TEST(Blend) { + ui8 to[32], from[32]; + for (ui8 i = 0; i < 32; i += 1) { + to[i] = i; + from[i] = 32 - i; + } + + ui16 need[16]; + for (ui8 i = 0; i < 16; i += 1) { + need[i] = i % 2 == 0 ? ((ui16*)to)[i] : ((ui16*)from)[i]; + } + + const int mask = 0b10101010; + TSimd8<ui8> v1(to); + TSimd8<ui8> v2(from); + TSimd8<ui8> res((ui8*) need); + + ui16 maskBuf[16]; + for (ui8 i = 0; i < 16; i += 1) { + maskBuf[i] = i % 2 == 0 ? 0 : ui16(-1); + } + TSimd8<ui8> blend((ui8*) maskBuf); + UNIT_ASSERT((v1.Blend16<mask>(v2) == res).All()); + UNIT_ASSERT((v1.BlendVar(v2, blend) == res).All()); } } @@ -183,20 +214,20 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) { } TSimd8<bool> tr(true); TSimd8<bool> fal(false); - UNIT_ASSERT_EQUAL(tr.Any(), true); - UNIT_ASSERT_EQUAL(fal.Any(), false); - UNIT_ASSERT_UNEQUAL(tr.Any(), fal.Any()); - UNIT_ASSERT_EQUAL(tr.Any(), (tr ^ fal).Any()); - UNIT_ASSERT_EQUAL(fal.Any(), (tr ^ tr).Any()); - UNIT_ASSERT_EQUAL(fal.Any(), (tr & fal).Any()); - UNIT_ASSERT_EQUAL((~tr).Any(), fal.Any()); - UNIT_ASSERT_EQUAL((~fal).Any(), tr.Any()); + UNIT_ASSERT_EQUAL(tr.All(), true); + UNIT_ASSERT_EQUAL(fal.All(), false); + UNIT_ASSERT_UNEQUAL(tr.All(), fal.All()); + UNIT_ASSERT_EQUAL(tr.All(), (tr ^ fal).All()); + UNIT_ASSERT_EQUAL(fal.All(), (tr ^ tr).All()); + UNIT_ASSERT_EQUAL(fal.All(), (tr & fal).All()); + UNIT_ASSERT_EQUAL((~tr).All(), fal.All()); + UNIT_ASSERT_EQUAL((~fal).All(), tr.All()); TSimd8<bool> bit_or = tr | fal; - UNIT_ASSERT_EQUAL(bit_or.Any(), tr.Any()); + UNIT_ASSERT_EQUAL(bit_or.All(), tr.All()); TSimd8<bool> tr_m(_mm_set_epi32(-1, -1, -1, -1)); - UNIT_ASSERT_EQUAL((tr_m == tr).Any(), TSimd8<bool>(true).Any()); + UNIT_ASSERT_EQUAL((tr_m == tr).All(), TSimd8<bool>(true).All()); } Y_UNIT_TEST(SimdUInt8) { if (!NX86::HaveSSE42()) { @@ -208,26 +239,26 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) { i = 0; } TSimd8<ui8> a(x), b(arr), c(ui8(0)); - UNIT_ASSERT_EQUAL((a == b).Any(), true); - UNIT_ASSERT_EQUAL((b == c).Any(), true); - UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).Any(), true); + UNIT_ASSERT_EQUAL((a == b).All(), true); + UNIT_ASSERT_EQUAL((b == c).All(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).All(), true); a = TSimd8<ui8>(ui8(50)); b = TSimd8<ui8>(ui8(49)); - UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false); - UNIT_ASSERT_EQUAL(c.BitsNotSet().Any(), true); - UNIT_ASSERT_EQUAL(a.BitsNotSet().Any(), false); - UNIT_ASSERT_EQUAL(a.AnyBitsSet().Any(), true); + UNIT_ASSERT_EQUAL(c.BitsNotSet().All(), true); + UNIT_ASSERT_EQUAL(a.BitsNotSet().All(), false); + UNIT_ASSERT_EQUAL(a.AnyBitsSet().All(), true); TSimd8<ui8> a2(ui8(100)); TSimd8<ui8> a3(ui8(25)); - UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).Any(), true); - UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).Any(), true); - UNIT_ASSERT_EQUAL((a.Shr<8>() == c).Any(), true); + UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).All(), true); + UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).All(), true); + UNIT_ASSERT_EQUAL((a.Shr<8>() == c).All(), true); } Y_UNIT_TEST(SimdInt8) { @@ -240,23 +271,23 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) { i = 0; } TSimd8<i8> a(x), b(arr), c(i8(0)); - UNIT_ASSERT_EQUAL((a == b).Any(), true); - UNIT_ASSERT_EQUAL((b == c).Any(), true); - UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).Any(), true); + UNIT_ASSERT_EQUAL((a == b).All(), true); + UNIT_ASSERT_EQUAL((b == c).All(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).All(), true); a = TSimd8<i8>(i8(50)); b = TSimd8<i8>(i8(49)); - UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); - UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false); TSimd8<i8> a2(i8(5)); TSimd8<i8> a3(i8(25)); a = TSimd8<i8>(i8(15)); b = TSimd8<i8>(i8(10)); - UNIT_ASSERT_EQUAL(((a + b) == a3).Any(), true); - UNIT_ASSERT_EQUAL(((a - b) == a2).Any(), true); + UNIT_ASSERT_EQUAL(((a + b) == a3).All(), true); + UNIT_ASSERT_EQUAL(((a - b) == a2).All(), true); } Y_UNIT_TEST(SimdTrait) { @@ -282,8 +313,8 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) { TSimd8<i8> arr(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); TSimd8<i8> Shift5(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0); TSimd8<i8> Shift5Right(0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10); - UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).Any(), true); - UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).Any(), true); + UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).All(), true); + UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).All(), true); } @@ -291,15 +322,39 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) { TSimd8<i8> index(0, -1, 2, -1, 4, -1, 6, -1, 0, -1, 2, -1, 4, -1, 6, -1); TSimd8<i8> tmp(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7); TSimd8<i8> result = tmp.Shuffle(index); - UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).Any(), true); + UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).All(), true); } Y_UNIT_TEST(UnpackMask) { TSimd8<i8> unpackMask = NSimd::CreateUnpackMask<NSimd::TSimdSSE42Traits>(2, 1, false); - UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10)).Any(), true); + UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10)).All(), true); } + Y_UNIT_TEST(Blend) { + ui8 to[16], from[16]; + for (ui8 i = 0; i < 16; i += 1) { + to[i] = i; + from[i] = 16 - i; + } + ui16 need[8]; + for (ui8 i = 0; i < 8; i += 1) { + need[i] = i % 2 == 0 ? ((ui16*)to)[i] : ((ui16*)from)[i]; + } + + const int mask = 0b10101010; + TSimd8<ui8> v1(to); + TSimd8<ui8> v2(from); + TSimd8<ui8> res((ui8*) need); + ui16 maskBuf[8]; + for (ui8 i = 0; i < 8; i += 1) { + maskBuf[i] = i % 2 == 0 ? 0 : ui16(-1); + } + TSimd8<ui8> blend((ui8*) maskBuf); + + UNIT_ASSERT_EQUAL((v1.Blend16<mask>(v2) == res).All(), true); + UNIT_ASSERT_EQUAL((v1.BlendVar(v2, blend) == res).All(), true); + } } #pragma clang attribute pop @@ -339,20 +394,53 @@ Y_UNIT_TEST_SUITE(SimdFallback) { TSimd8<i8> shift5(5, 6, 7, 0, 0, 0, 0, 0); TSimd8<i8> shift5Right(0, 0, 0, 0, 0, 0, 1, 2); - UNIT_ASSERT_EQUAL((shift5Right == arr.Shuffle(mask0)).Any(), true); - UNIT_ASSERT_EQUAL((shift5 == arr.Shuffle(mask1)).Any(), true); + UNIT_ASSERT_EQUAL((shift5Right == arr.Shuffle(mask0)).All(), true); + UNIT_ASSERT_EQUAL((shift5 == arr.Shuffle(mask1)).All(), true); } Y_UNIT_TEST(Shuffle) { TSimd8<i8> index(0, -1, 2, -1, 4, -1, 6, -1); TSimd8<i8> tmp(0, 1, 2, 3, 4, 5, 6, 7); TSimd8<i8> result = tmp.Shuffle(index); - UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0)).Any(), true); + UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0)).All(), true); } Y_UNIT_TEST(UnpackMask) { TSimd8<i8> unpackMask = NSimd::CreateUnpackMask<NSimd::TSimdFallbackTraits>(2, 1, false); - UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5)).Any(), true); + unpackMask.Log<i8>(Cerr); + TSimd8<ui8>((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5)).Value).Log<ui8>(Cerr); + UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5)).All(), true); } + Y_UNIT_TEST(CRC) { + ui32 val = 0x454234; + + UNIT_ASSERT_EQUAL(TSimd8<i8>::CRC32u32(0, val), 1867938110); + } + + Y_UNIT_TEST(Blend) { + ui8 to[8], from[8]; + for (ui8 i = 0; i < 8; i += 1) { + to[i] = i; + from[i] = 8 - i; + } + ui16 need[8]; + for (ui8 i = 0; i < 8; i += 1) { + need[i] = i % 2 == 0 ? ((ui16*)to)[i] : ((ui16*)from)[i]; + } + + const int mask = 0b10101010; + TSimd8<ui8> v1(to); + TSimd8<ui8> v2(from); + TSimd8<ui8> res((ui8*) need); + + ui16 maskBuf[4]; + for (ui8 i = 0; i < 4; i += 1) { + maskBuf[i] = i % 2 == 0 ? 0 : ui16(-1); + } + TSimd8<ui8> blend((ui8*) maskBuf); + + UNIT_ASSERT_EQUAL((v1.Blend16<mask>(v2) == res).All(), true); + UNIT_ASSERT_EQUAL((v1.BlendVar(v2, blend) == res).All(), true); + } }
\ No newline at end of file diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt index 1f0851bb5d9..e03efb8a13f 100644 --- a/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt @@ -16,6 +16,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC yutil library-cpp-cpuid_check cpp-testing-unittest_main + yql-utils-simd ) target_link_options(ydb-library-yql-utils-simd-ut PRIVATE -Wl,-platform_version,macos,11.0,11.0 diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt index f43917db826..4c8657d7a5a 100644 --- a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt @@ -16,6 +16,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC contrib-libs-cxxsupp yutil cpp-testing-unittest_main + yql-utils-simd ) target_link_options(ydb-library-yql-utils-simd-ut PRIVATE -ldl diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt index 15ff8ed27ab..fc49024e695 100644 --- a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt @@ -17,6 +17,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC yutil library-cpp-cpuid_check cpp-testing-unittest_main + yql-utils-simd ) target_link_options(ydb-library-yql-utils-simd-ut PRIVATE -ldl diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt index 72fe7818c18..66276687c8a 100644 --- a/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt @@ -16,6 +16,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC yutil library-cpp-cpuid_check cpp-testing-unittest_main + yql-utils-simd ) target_sources(ydb-library-yql-utils-simd-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/simd_ut.cpp diff --git a/ydb/library/yql/utils/simd/ya.make b/ydb/library/yql/utils/simd/ya.make index e6a05de3592..65a3367ca50 100644 --- a/ydb/library/yql/utils/simd/ya.make +++ b/ydb/library/yql/utils/simd/ya.make @@ -1,19 +1,10 @@ -EXECTEST() +LIBRARY() -RUN( - stream_store -) - -DEPENDS( - ydb/library/yql/utils/simd/exec/stream_store -) +PEERDIR(library/cpp/digest/crc32c) END() -RECURSE( - exec -) - RECURSE_FOR_TESTS( ut + exec )
\ No newline at end of file |