summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfixthgame <[email protected]>2023-11-02 14:00:58 +0300
committerfixthgame <[email protected]>2023-11-02 14:37:12 +0300
commitddd7b74c610291ab16eeaab1ebca939e2b7d8c43 (patch)
tree14721ed25d5da0c2b3a5b32da64e14f1e07f2c5b
parent55f69226ed874bfe31b171e1951096e65026c4c5 (diff)
Bug fix + CRC32+BLEND TEST
fix bugs
-rw-r--r--.mapping.json4
-rw-r--r--ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt17
-rw-r--r--ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt18
-rw-r--r--ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt18
-rw-r--r--ydb/library/yql/utils/simd/CMakeLists.txt11
-rw-r--r--ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt17
-rw-r--r--ydb/library/yql/utils/simd/exec/ya.make20
-rw-r--r--ydb/library/yql/utils/simd/simd_avx2.h40
-rw-r--r--ydb/library/yql/utils/simd/simd_fallback.h40
-rw-r--r--ydb/library/yql/utils/simd/simd_sse42.h35
-rw-r--r--ydb/library/yql/utils/simd/simd_ut.cpp234
-rw-r--r--ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/utils/simd/ya.make15
16 files changed, 368 insertions, 105 deletions
diff --git a/.mapping.json b/.mapping.json
index 30056cdd6e7..537f8d29e36 100644
--- a/.mapping.json
+++ b/.mapping.json
@@ -8770,7 +8770,11 @@
"ydb/library/yql/utils/log/ut/CMakeLists.linux-x86_64.txt":"",
"ydb/library/yql/utils/log/ut/CMakeLists.txt":"",
"ydb/library/yql/utils/log/ut/CMakeLists.windows-x86_64.txt":"",
+ "ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt":"",
+ "ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt":"",
+ "ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt":"",
"ydb/library/yql/utils/simd/CMakeLists.txt":"",
+ "ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt":"",
"ydb/library/yql/utils/simd/exec/CMakeLists.txt":"",
"ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt":"",
"ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt":"",
diff --git a/ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 00000000000..7ac871c7202
--- /dev/null
+++ b/ydb/library/yql/utils/simd/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(exec)
+add_subdirectory(ut)
+
+add_library(yql-utils-simd INTERFACE)
+target_link_libraries(yql-utils-simd INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ cpp-digest-crc32c
+)
diff --git a/ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt
new file mode 100644
index 00000000000..09fa2b12b6b
--- /dev/null
+++ b/ydb/library/yql/utils/simd/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(exec)
+add_subdirectory(ut)
+
+add_library(yql-utils-simd INTERFACE)
+target_link_libraries(yql-utils-simd INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-digest-crc32c
+)
diff --git a/ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt
new file mode 100644
index 00000000000..09fa2b12b6b
--- /dev/null
+++ b/ydb/library/yql/utils/simd/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,18 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(exec)
+add_subdirectory(ut)
+
+add_library(yql-utils-simd INTERFACE)
+target_link_libraries(yql-utils-simd INTERFACE
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ cpp-digest-crc32c
+)
diff --git a/ydb/library/yql/utils/simd/CMakeLists.txt b/ydb/library/yql/utils/simd/CMakeLists.txt
index 4118c479b56..f8b31df0c11 100644
--- a/ydb/library/yql/utils/simd/CMakeLists.txt
+++ b/ydb/library/yql/utils/simd/CMakeLists.txt
@@ -6,5 +6,12 @@
# original buildsystem will not be accepted.
-add_subdirectory(exec)
-add_subdirectory(ut)
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt
new file mode 100644
index 00000000000..7ac871c7202
--- /dev/null
+++ b/ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+add_subdirectory(exec)
+add_subdirectory(ut)
+
+add_library(yql-utils-simd INTERFACE)
+target_link_libraries(yql-utils-simd INTERFACE
+ contrib-libs-cxxsupp
+ yutil
+ cpp-digest-crc32c
+)
diff --git a/ydb/library/yql/utils/simd/exec/ya.make b/ydb/library/yql/utils/simd/exec/ya.make
index 91e55d4d64d..3fcf4d03e02 100644
--- a/ydb/library/yql/utils/simd/exec/ya.make
+++ b/ydb/library/yql/utils/simd/exec/ya.make
@@ -1 +1,19 @@
-RECURSE(stream_store) \ No newline at end of file
+EXECTEST()
+
+RUN(
+ stream_store
+)
+
+DEPENDS(
+ ydb/library/yql/utils/simd/exec/stream_store
+)
+
+PEERDIR(
+ ydb/library/yql/utils/simd
+)
+
+END()
+
+RECURSE(
+ stream_store
+) \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/simd_avx2.h b/ydb/library/yql/utils/simd/simd_avx2.h
index f8b78dc81c1..4e03841843b 100644
--- a/ydb/library/yql/utils/simd/simd_avx2.h
+++ b/ydb/library/yql/utils/simd/simd_avx2.h
@@ -77,11 +77,27 @@ struct TBase8: TBase<TSimd8<T>> {
template<int N>
inline TSimd8<T> Blend16(const TSimd8<T> other) {
- return _mm256_blend_epi16(this->Value, other->Value, N);
+ return _mm256_blend_epi16(this->Value, other.Value, N);
}
inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) {
- return _mm256_blendv_epi8(this->Value, other->Value, mask);
+ return _mm256_blendv_epi8(this->Value, other.Value, mask.Value);
+ }
+
+ static inline ui32 CRC32u8(ui32 crc, ui8 data) {
+ return _mm_crc32_u8(crc, data);
+ }
+
+ static inline ui32 CRC32u16(ui32 crc, ui16 data) {
+ return _mm_crc32_u16(crc, data);
+ }
+
+ static inline ui32 CRC32u32(ui32 crc, ui32 data) {
+ return _mm_crc32_u32(crc, data);
+ }
+
+ static inline ui64 CRC32u64(ui64 crc, ui64 data) {
+ return _mm_crc32_u64(crc, data);
}
friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) {
@@ -113,10 +129,18 @@ struct TSimd8<bool>: TBase8<bool> {
return _mm256_set1_epi8(ui8(-(!!value)));
}
+ inline int ToBitMask() const {
+ return _mm256_movemask_epi8(this->Value);
+ }
+
inline bool Any() const {
return !_mm256_testz_si256(this->Value, this->Value);
}
+ inline bool All() const {
+ return this->ToBitMask() == i32(0xFFFFFFFF);
+ }
+
inline TSimd8<bool> operator~() const {
return *this ^ true;
}
@@ -148,8 +172,8 @@ struct TBase8Numeric: TBase8<T> {
return _mm256_load_si256(reinterpret_cast<const __m256i *>(values));
}
- inline void LoadStream(T dst[16]) const {
- return _mm256_stream_load_si256(reinterpret_cast<__m256i *>(dst), this->Value);
+ static inline TSimd8<T> LoadStream(T dst[32]) {
+ return _mm256_stream_load_si256(reinterpret_cast<__m256i *>(dst));
}
inline void Store(T dst[32]) const {
@@ -160,7 +184,7 @@ struct TBase8Numeric: TBase8<T> {
return _mm256_store_si256(reinterpret_cast<__m256i *>(dst), this->Value);
}
- inline void StoreStream(T dst[16]) const {
+ inline void StoreStream(T dst[32]) const {
return _mm256_stream_si256(reinterpret_cast<__m256i *>(dst), this->Value);
}
@@ -170,7 +194,9 @@ struct TBase8Numeric: TBase8<T> {
TSimd8<T> mask1(0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70, 0x70);
TSimd8<T> perm = _mm256_permute4x64_epi64(this->Value, 0x4E);
- return Shuffle128(other + mask0) | perm.Shuffle128(other + mask1);
+ TSimd8<T> tmp = Shuffle128(other + mask0) | perm.Shuffle128(other + mask1);
+ TSimd8<T> mask = _mm256_cmpgt_epi8(other.Value, _mm256_set1_epi8(-1));
+ return tmp & mask;
}
inline TSimd8<T> Shuffle128(const TSimd8<T> other) const {
@@ -193,7 +219,7 @@ struct TBase8Numeric: TBase8<T> {
void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") {
const size_t n = sizeof(this->Value) / sizeof(TOut);
TOut buf[n];
- this->Store((i8*) buf);
+ this->Store((T*) buf);
if (n == sizeof(this->Value)) {
for (size_t i = 0; i < n; i += 1) {
out << int(buf[i]);
diff --git a/ydb/library/yql/utils/simd/simd_fallback.h b/ydb/library/yql/utils/simd/simd_fallback.h
index 7bd0009e7a2..b225fa939d3 100644
--- a/ydb/library/yql/utils/simd/simd_fallback.h
+++ b/ydb/library/yql/utils/simd/simd_fallback.h
@@ -3,6 +3,8 @@
#include <cstdint>
#include <immintrin.h>
+#include <library/cpp/digest/crc32c/crc32c.h>
+
#include <util/system/types.h>
#include <util/stream/output.h>
#include <util/generic/string.h>
@@ -80,7 +82,7 @@ struct TBase8: TBase<TSimd8<T>> {
size_t j = (1 << 16) - 1;
for (size_t i = 0; i < 4; i += 1, j <<= 16) {
if (N & (1LL << i)) {
- dst |= other->Value & j;
+ dst |= other.Value & j;
} else {
dst |= this->Value & j;
}
@@ -92,8 +94,8 @@ struct TBase8: TBase<TSimd8<T>> {
ui64 dst = 0;
size_t j = (1 << 8) - 1;
for (size_t i = 0; i < 8; i += 1, j <<= 8) {
- if (mask.Value & (1LL << i)) {
- dst |= other->Value & j;
+ if (mask.Value & (1uLL << (i * 8 + 7))) {
+ dst |= other.Value & j;
} else {
dst |= this->Value & j;
}
@@ -101,6 +103,22 @@ struct TBase8: TBase<TSimd8<T>> {
return TSimd8<T>(dst);
}
+ static inline ui32 CRC32u8(ui32 crc, ui8 data) {
+ return ~Crc32cExtend(~crc, (void*) &data, 1);
+ }
+
+ static inline ui32 CRC32u16(ui32 crc, ui16 data) {
+ return ~Crc32cExtend(~crc, (void*) &data, 2);
+ }
+
+ static inline ui32 CRC32u32(ui32 crc, ui32 data) {
+ return ~Crc32cExtend(~crc, (void*) &data, 4);
+ }
+
+ static inline ui64 CRC32u64(ui64 crc, ui64 data) {
+ return ~Crc32cExtend(~crc, (void*) &data, 8);
+ }
+
friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) {
return lhs.Value == rhs.Value;
}
@@ -130,10 +148,24 @@ struct TSimd8<bool>: TBase8<bool> {
return ui64(-value);
}
+ inline int ToBitMask() const {
+ int result = 0;
+ for (size_t j = 0; j < 8; j += 1) {
+ if ((1ULL << (j * 8 + 7)) & this->Value) {
+ result |= (1 << j);
+ }
+ }
+ return result;
+ }
+
inline bool Any() const {
return Value != 0;
}
+ inline bool All() const {
+ return this->Value == ui64(-1);
+ }
+
inline TSimd8<bool> operator~() const {
return *this ^ true;
}
@@ -207,7 +239,7 @@ struct TBase8Numeric: TBase8<T> {
void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") {
const size_t n = sizeof(this->Value) / sizeof(TOut);
TOut buf[n];
- Store((i8*) buf);
+ Store((T*) buf);
if (n == sizeof(this->Value)) {
for (size_t i = 0; i < n; i += 1) {
out << int(buf[i]);
diff --git a/ydb/library/yql/utils/simd/simd_sse42.h b/ydb/library/yql/utils/simd/simd_sse42.h
index 835ecc13cc3..a75eadcff93 100644
--- a/ydb/library/yql/utils/simd/simd_sse42.h
+++ b/ydb/library/yql/utils/simd/simd_sse42.h
@@ -77,11 +77,27 @@ struct TBase8: TBase<TSimd8<T>> {
template<int N>
inline TSimd8<T> Blend16(const TSimd8<T> other) {
- return _mm_blend_epi16(this->Value, other->Value, N);
+ return _mm_blend_epi16(this->Value, other.Value, N);
}
inline TSimd8<T> BlendVar(const TSimd8<T> other, const TSimd8<T> mask) {
- return _mm_blendv_epi8(this->Value, other->Value, mask);
+ return _mm_blendv_epi8(this->Value, other.Value, mask.Value);
+ }
+
+ static inline ui32 CRC32u8(ui32 crc, ui8 data) {
+ return _mm_crc32_u8(crc, data);
+ }
+
+ static inline ui32 CRC32u16(ui32 crc, ui16 data) {
+ return _mm_crc32_u16(crc, data);
+ }
+
+ static inline ui32 CRC32u32(ui32 crc, ui32 data) {
+ return _mm_crc32_u32(crc, data);
+ }
+
+ static inline ui64 CRC32u64(ui64 crc, ui64 data) {
+ return _mm_crc32_u64(crc, data);
}
friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) {
@@ -113,10 +129,18 @@ struct TSimd8<bool>: TBase8<bool> {
return _mm_set1_epi8(ui8(-(!!value)));
}
+ inline int ToBitMask() const {
+ return _mm_movemask_epi8(this->Value);
+ }
+
inline bool Any() const {
return !_mm_testz_si128(this->Value, this->Value);
}
+ inline bool All() const {
+ return this->ToBitMask() == i32(0xFFFF);
+ }
+
inline TSimd8<bool> operator~() const {
return *this ^ true;
}
@@ -148,9 +172,8 @@ struct TBase8Numeric: TBase8<T> {
return _mm_load_si128(reinterpret_cast<const __m128i *>(values));
}
-
- inline void LoadStream(T dst[16]) const {
- return _mm_stream_load_si128(reinterpret_cast<__m128i *>(dst), this->Value);
+ static inline TSimd8<T> LoadStream(T dst[16]) {
+ return _mm_stream_load_si128(reinterpret_cast<__m128i *>(dst));
}
inline void Store(T dst[16]) const {
@@ -189,7 +212,7 @@ struct TBase8Numeric: TBase8<T> {
void Log(IOutputStream& out, TString delimeter = " ", TString end = "\n") {
const size_t n = sizeof(this->Value) / sizeof(TOut);
TOut buf[n];
- Store((i8*) buf);
+ Store((T*) buf);
if (n == sizeof(this->Value)) {
for (size_t i = 0; i < n; i += 1) {
out << int(buf[i]);
diff --git a/ydb/library/yql/utils/simd/simd_ut.cpp b/ydb/library/yql/utils/simd/simd_ut.cpp
index 104db0e58ed..9c135e6f50f 100644
--- a/ydb/library/yql/utils/simd/simd_ut.cpp
+++ b/ydb/library/yql/utils/simd/simd_ut.cpp
@@ -38,20 +38,20 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) {
}
TSimd8<bool> tr(true);
TSimd8<bool> fal(false);
- UNIT_ASSERT_EQUAL(tr.Any(), true);
- UNIT_ASSERT_EQUAL(fal.Any(), false);
- UNIT_ASSERT_UNEQUAL(tr.Any(), fal.Any());
- UNIT_ASSERT_EQUAL(tr.Any(), (tr ^ fal).Any());
- UNIT_ASSERT_EQUAL(fal.Any(), (tr ^ tr).Any());
- UNIT_ASSERT_EQUAL(fal.Any(), (tr & fal).Any());
- UNIT_ASSERT_EQUAL((~tr).Any(), fal.Any());
- UNIT_ASSERT_EQUAL((~fal).Any(), tr.Any());
+ UNIT_ASSERT_EQUAL(tr.All(), true);
+ UNIT_ASSERT_EQUAL(fal.All(), false);
+ UNIT_ASSERT_UNEQUAL(tr.All(), fal.All());
+ UNIT_ASSERT_EQUAL(tr.All(), (tr ^ fal).All());
+ UNIT_ASSERT_EQUAL(fal.All(), (tr ^ tr).All());
+ UNIT_ASSERT_EQUAL(fal.All(), (tr & fal).All());
+ UNIT_ASSERT_EQUAL((~tr).All(), fal.All());
+ UNIT_ASSERT_EQUAL((~fal).All(), tr.All());
TSimd8<bool> bit_or = tr | fal;
- UNIT_ASSERT_EQUAL(bit_or.Any(), tr.Any());
+ UNIT_ASSERT_EQUAL(bit_or.All(), tr.All());
TSimd8<bool> tr_m(_mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1));
- UNIT_ASSERT_EQUAL((tr_m == tr).Any(), TSimd8<bool>(true).Any());
+ UNIT_ASSERT_EQUAL((tr_m == tr).All(), TSimd8<bool>(true).All());
}
Y_UNIT_TEST(SimdUInt8) {
if (!NX86::HaveAVX2()) {
@@ -63,26 +63,26 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) {
i = 0;
}
TSimd8<ui8> a(x), b(arr), c(ui8(0));
- UNIT_ASSERT_EQUAL((a == b).Any(), true);
- UNIT_ASSERT_EQUAL((b == c).Any(), true);
- UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).Any(), true);
+ UNIT_ASSERT_EQUAL((a == b).All(), true);
+ UNIT_ASSERT_EQUAL((b == c).All(), true);
+ UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).All(), true);
a = TSimd8<ui8>(ui8(50));
b = TSimd8<ui8>(ui8(49));
- UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false);
+ UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false);
- UNIT_ASSERT_EQUAL(c.BitsNotSet().Any(), true);
- UNIT_ASSERT_EQUAL(a.BitsNotSet().Any(), false);
- UNIT_ASSERT_EQUAL(a.AnyBitsSet().Any(), true);
+ UNIT_ASSERT_EQUAL(c.BitsNotSet().All(), true);
+ UNIT_ASSERT_EQUAL(a.BitsNotSet().All(), false);
+ UNIT_ASSERT_EQUAL(a.AnyBitsSet().All(), true);
TSimd8<ui8> a2(ui8(100));
TSimd8<ui8> a3(ui8(25));
- UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).Any(), true);
- UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).Any(), true);
- UNIT_ASSERT_EQUAL((a.Shr<8>() == c).Any(), true);
+ UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).All(), true);
+ UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).All(), true);
+ UNIT_ASSERT_EQUAL((a.Shr<8>() == c).All(), true);
}
Y_UNIT_TEST(SimdInt8) {
@@ -95,23 +95,23 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) {
i = 0;
}
TSimd8<i8> a(x), b(arr), c(i8(0));
- UNIT_ASSERT_EQUAL((a == b).Any(), true);
- UNIT_ASSERT_EQUAL((b == c).Any(), true);
- UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).Any(), true);
+ UNIT_ASSERT_EQUAL((a == b).All(), true);
+ UNIT_ASSERT_EQUAL((b == c).All(), true);
+ UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).All(), true);
a = TSimd8<i8>(i8(50));
b = TSimd8<i8>(i8(49));
- UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false);
+ UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false);
TSimd8<i8> a2(i8(5));
TSimd8<i8> a3(i8(25));
a = TSimd8<i8>(i8(15));
b = TSimd8<i8>(i8(10));
- UNIT_ASSERT_EQUAL(((a + b) == a3).Any(), true);
- UNIT_ASSERT_EQUAL(((a - b) == a2).Any(), true);
+ UNIT_ASSERT_EQUAL(((a + b) == a3).All(), true);
+ UNIT_ASSERT_EQUAL(((a - b) == a2).All(), true);
}
Y_UNIT_TEST(SimdTrait) {
@@ -137,7 +137,7 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) {
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7);
TSimd8<i8> result = tmp.Shuffle(index);
UNIT_ASSERT_EQUAL((result == TSimd8<i8>(7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0,
- 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0)).Any(), true);
+ 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0)).All(), true);
}
Y_UNIT_TEST(Shuffle128) {
@@ -147,7 +147,7 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) {
0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7);
TSimd8<i8> result = tmp.Shuffle128(index);
UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0,
- 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).Any(), true);
+ 0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).All(), true);
}
Y_UNIT_TEST(ShiftBytes) {
@@ -160,15 +160,46 @@ Y_UNIT_TEST_SUITE(TSimdAVX2) {
5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0);
TSimd8<i8> Shift5Right( 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
- UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).Any(), true);
- UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).Any(), true);
+ UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).All(), true);
+ UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).All(), true);
}
Y_UNIT_TEST(UnpackMask) {
TSimd8<i8> unpackMask = NSimd::CreateUnpackMask<NSimd::TSimdAVX2Traits>(2, 1, false);
unpackMask.Log<i8>(Cerr);
UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10, 11,
- -1, 12, 13, -1, 14, 15, -1, 16, 17, -1, 18, 19, -1, 20, 21)).Any(), true);
+ -1, 12, 13, -1, 14, 15, -1, 16, 17, -1, 18, 19, -1, 20, 21)).All(), true);
+ }
+
+ Y_UNIT_TEST(CRC) {
+ ui32 val = 0x454234;
+ UNIT_ASSERT_EQUAL(TSimd8<i8>::CRC32u32(0, val), 1867938110);
+ }
+
+ Y_UNIT_TEST(Blend) {
+ ui8 to[32], from[32];
+ for (ui8 i = 0; i < 32; i += 1) {
+ to[i] = i;
+ from[i] = 32 - i;
+ }
+
+ ui16 need[16];
+ for (ui8 i = 0; i < 16; i += 1) {
+ need[i] = i % 2 == 0 ? ((ui16*)to)[i] : ((ui16*)from)[i];
+ }
+
+ const int mask = 0b10101010;
+ TSimd8<ui8> v1(to);
+ TSimd8<ui8> v2(from);
+ TSimd8<ui8> res((ui8*) need);
+
+ ui16 maskBuf[16];
+ for (ui8 i = 0; i < 16; i += 1) {
+ maskBuf[i] = i % 2 == 0 ? 0 : ui16(-1);
+ }
+ TSimd8<ui8> blend((ui8*) maskBuf);
+ UNIT_ASSERT((v1.Blend16<mask>(v2) == res).All());
+ UNIT_ASSERT((v1.BlendVar(v2, blend) == res).All());
}
}
@@ -183,20 +214,20 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) {
}
TSimd8<bool> tr(true);
TSimd8<bool> fal(false);
- UNIT_ASSERT_EQUAL(tr.Any(), true);
- UNIT_ASSERT_EQUAL(fal.Any(), false);
- UNIT_ASSERT_UNEQUAL(tr.Any(), fal.Any());
- UNIT_ASSERT_EQUAL(tr.Any(), (tr ^ fal).Any());
- UNIT_ASSERT_EQUAL(fal.Any(), (tr ^ tr).Any());
- UNIT_ASSERT_EQUAL(fal.Any(), (tr & fal).Any());
- UNIT_ASSERT_EQUAL((~tr).Any(), fal.Any());
- UNIT_ASSERT_EQUAL((~fal).Any(), tr.Any());
+ UNIT_ASSERT_EQUAL(tr.All(), true);
+ UNIT_ASSERT_EQUAL(fal.All(), false);
+ UNIT_ASSERT_UNEQUAL(tr.All(), fal.All());
+ UNIT_ASSERT_EQUAL(tr.All(), (tr ^ fal).All());
+ UNIT_ASSERT_EQUAL(fal.All(), (tr ^ tr).All());
+ UNIT_ASSERT_EQUAL(fal.All(), (tr & fal).All());
+ UNIT_ASSERT_EQUAL((~tr).All(), fal.All());
+ UNIT_ASSERT_EQUAL((~fal).All(), tr.All());
TSimd8<bool> bit_or = tr | fal;
- UNIT_ASSERT_EQUAL(bit_or.Any(), tr.Any());
+ UNIT_ASSERT_EQUAL(bit_or.All(), tr.All());
TSimd8<bool> tr_m(_mm_set_epi32(-1, -1, -1, -1));
- UNIT_ASSERT_EQUAL((tr_m == tr).Any(), TSimd8<bool>(true).Any());
+ UNIT_ASSERT_EQUAL((tr_m == tr).All(), TSimd8<bool>(true).All());
}
Y_UNIT_TEST(SimdUInt8) {
if (!NX86::HaveSSE42()) {
@@ -208,26 +239,26 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) {
i = 0;
}
TSimd8<ui8> a(x), b(arr), c(ui8(0));
- UNIT_ASSERT_EQUAL((a == b).Any(), true);
- UNIT_ASSERT_EQUAL((b == c).Any(), true);
- UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).Any(), true);
+ UNIT_ASSERT_EQUAL((a == b).All(), true);
+ UNIT_ASSERT_EQUAL((b == c).All(), true);
+ UNIT_ASSERT_EQUAL((c == TSimd8<ui8>::Zero()).All(), true);
a = TSimd8<ui8>(ui8(50));
b = TSimd8<ui8>(ui8(49));
- UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false);
+ UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false);
- UNIT_ASSERT_EQUAL(c.BitsNotSet().Any(), true);
- UNIT_ASSERT_EQUAL(a.BitsNotSet().Any(), false);
- UNIT_ASSERT_EQUAL(a.AnyBitsSet().Any(), true);
+ UNIT_ASSERT_EQUAL(c.BitsNotSet().All(), true);
+ UNIT_ASSERT_EQUAL(a.BitsNotSet().All(), false);
+ UNIT_ASSERT_EQUAL(a.AnyBitsSet().All(), true);
TSimd8<ui8> a2(ui8(100));
TSimd8<ui8> a3(ui8(25));
- UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).Any(), true);
- UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).Any(), true);
- UNIT_ASSERT_EQUAL((a.Shr<8>() == c).Any(), true);
+ UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).All(), true);
+ UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).All(), true);
+ UNIT_ASSERT_EQUAL((a.Shr<8>() == c).All(), true);
}
Y_UNIT_TEST(SimdInt8) {
@@ -240,23 +271,23 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) {
i = 0;
}
TSimd8<i8> a(x), b(arr), c(i8(0));
- UNIT_ASSERT_EQUAL((a == b).Any(), true);
- UNIT_ASSERT_EQUAL((b == c).Any(), true);
- UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).Any(), true);
+ UNIT_ASSERT_EQUAL((a == b).All(), true);
+ UNIT_ASSERT_EQUAL((b == c).All(), true);
+ UNIT_ASSERT_EQUAL((c == TSimd8<i8>::Zero()).All(), true);
a = TSimd8<i8>(i8(50));
b = TSimd8<i8>(i8(49));
- UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true);
- UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false);
+ UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == b).All(), true);
+ UNIT_ASSERT_EQUAL((a.MinValue(b) == a).All(), false);
TSimd8<i8> a2(i8(5));
TSimd8<i8> a3(i8(25));
a = TSimd8<i8>(i8(15));
b = TSimd8<i8>(i8(10));
- UNIT_ASSERT_EQUAL(((a + b) == a3).Any(), true);
- UNIT_ASSERT_EQUAL(((a - b) == a2).Any(), true);
+ UNIT_ASSERT_EQUAL(((a + b) == a3).All(), true);
+ UNIT_ASSERT_EQUAL(((a - b) == a2).All(), true);
}
Y_UNIT_TEST(SimdTrait) {
@@ -282,8 +313,8 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) {
TSimd8<i8> arr(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
TSimd8<i8> Shift5(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0);
TSimd8<i8> Shift5Right(0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
- UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).Any(), true);
- UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).Any(), true);
+ UNIT_ASSERT_EQUAL((Shift5Right == arr.Shuffle(mask0)).All(), true);
+ UNIT_ASSERT_EQUAL((Shift5 == arr.Shuffle(mask1)).All(), true);
}
@@ -291,15 +322,39 @@ Y_UNIT_TEST_SUITE(TSimdSSE42) {
TSimd8<i8> index(0, -1, 2, -1, 4, -1, 6, -1, 0, -1, 2, -1, 4, -1, 6, -1);
TSimd8<i8> tmp(0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7);
TSimd8<i8> result = tmp.Shuffle(index);
- UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).Any(), true);
+ UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0, 0, 0, 2, 0, 4, 0, 6, 0)).All(), true);
}
Y_UNIT_TEST(UnpackMask) {
TSimd8<i8> unpackMask = NSimd::CreateUnpackMask<NSimd::TSimdSSE42Traits>(2, 1, false);
- UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10)).Any(), true);
+ UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5, -1, 6, 7, -1, 8, 9, -1, 10)).All(), true);
}
+ Y_UNIT_TEST(Blend) {
+ ui8 to[16], from[16];
+ for (ui8 i = 0; i < 16; i += 1) {
+ to[i] = i;
+ from[i] = 16 - i;
+ }
+ ui16 need[8];
+ for (ui8 i = 0; i < 8; i += 1) {
+ need[i] = i % 2 == 0 ? ((ui16*)to)[i] : ((ui16*)from)[i];
+ }
+
+ const int mask = 0b10101010;
+ TSimd8<ui8> v1(to);
+ TSimd8<ui8> v2(from);
+ TSimd8<ui8> res((ui8*) need);
+ ui16 maskBuf[8];
+ for (ui8 i = 0; i < 8; i += 1) {
+ maskBuf[i] = i % 2 == 0 ? 0 : ui16(-1);
+ }
+ TSimd8<ui8> blend((ui8*) maskBuf);
+
+ UNIT_ASSERT_EQUAL((v1.Blend16<mask>(v2) == res).All(), true);
+ UNIT_ASSERT_EQUAL((v1.BlendVar(v2, blend) == res).All(), true);
+ }
}
#pragma clang attribute pop
@@ -339,20 +394,53 @@ Y_UNIT_TEST_SUITE(SimdFallback) {
TSimd8<i8> shift5(5, 6, 7, 0, 0, 0, 0, 0);
TSimd8<i8> shift5Right(0, 0, 0, 0, 0, 0, 1, 2);
- UNIT_ASSERT_EQUAL((shift5Right == arr.Shuffle(mask0)).Any(), true);
- UNIT_ASSERT_EQUAL((shift5 == arr.Shuffle(mask1)).Any(), true);
+ UNIT_ASSERT_EQUAL((shift5Right == arr.Shuffle(mask0)).All(), true);
+ UNIT_ASSERT_EQUAL((shift5 == arr.Shuffle(mask1)).All(), true);
}
Y_UNIT_TEST(Shuffle) {
TSimd8<i8> index(0, -1, 2, -1, 4, -1, 6, -1);
TSimd8<i8> tmp(0, 1, 2, 3, 4, 5, 6, 7);
TSimd8<i8> result = tmp.Shuffle(index);
- UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0)).Any(), true);
+ UNIT_ASSERT_EQUAL((result == TSimd8<i8>(0, 0, 2, 0, 4, 0, 6, 0)).All(), true);
}
Y_UNIT_TEST(UnpackMask) {
TSimd8<i8> unpackMask = NSimd::CreateUnpackMask<NSimd::TSimdFallbackTraits>(2, 1, false);
- UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5)).Any(), true);
+ unpackMask.Log<i8>(Cerr);
+ TSimd8<ui8>((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5)).Value).Log<ui8>(Cerr);
+ UNIT_ASSERT_EQUAL((unpackMask == TSimd8<i8>(0, 1, -1, 2, 3, -1, 4, 5)).All(), true);
}
+ Y_UNIT_TEST(CRC) {
+ ui32 val = 0x454234;
+
+ UNIT_ASSERT_EQUAL(TSimd8<i8>::CRC32u32(0, val), 1867938110);
+ }
+
+ Y_UNIT_TEST(Blend) {
+ ui8 to[8], from[8];
+ for (ui8 i = 0; i < 8; i += 1) {
+ to[i] = i;
+ from[i] = 8 - i;
+ }
+ ui16 need[8];
+ for (ui8 i = 0; i < 8; i += 1) {
+ need[i] = i % 2 == 0 ? ((ui16*)to)[i] : ((ui16*)from)[i];
+ }
+
+ const int mask = 0b10101010;
+ TSimd8<ui8> v1(to);
+ TSimd8<ui8> v2(from);
+ TSimd8<ui8> res((ui8*) need);
+
+ ui16 maskBuf[4];
+ for (ui8 i = 0; i < 4; i += 1) {
+ maskBuf[i] = i % 2 == 0 ? 0 : ui16(-1);
+ }
+ TSimd8<ui8> blend((ui8*) maskBuf);
+
+ UNIT_ASSERT_EQUAL((v1.Blend16<mask>(v2) == res).All(), true);
+ UNIT_ASSERT_EQUAL((v1.BlendVar(v2, blend) == res).All(), true);
+ }
} \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt
index 1f0851bb5d9..e03efb8a13f 100644
--- a/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/utils/simd/ut/CMakeLists.darwin-x86_64.txt
@@ -16,6 +16,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC
yutil
library-cpp-cpuid_check
cpp-testing-unittest_main
+ yql-utils-simd
)
target_link_options(ydb-library-yql-utils-simd-ut PRIVATE
-Wl,-platform_version,macos,11.0,11.0
diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt
index f43917db826..4c8657d7a5a 100644
--- a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-aarch64.txt
@@ -16,6 +16,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC
contrib-libs-cxxsupp
yutil
cpp-testing-unittest_main
+ yql-utils-simd
)
target_link_options(ydb-library-yql-utils-simd-ut PRIVATE
-ldl
diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt
index 15ff8ed27ab..fc49024e695 100644
--- a/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/utils/simd/ut/CMakeLists.linux-x86_64.txt
@@ -17,6 +17,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC
yutil
library-cpp-cpuid_check
cpp-testing-unittest_main
+ yql-utils-simd
)
target_link_options(ydb-library-yql-utils-simd-ut PRIVATE
-ldl
diff --git a/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt
index 72fe7818c18..66276687c8a 100644
--- a/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/utils/simd/ut/CMakeLists.windows-x86_64.txt
@@ -16,6 +16,7 @@ target_link_libraries(ydb-library-yql-utils-simd-ut PUBLIC
yutil
library-cpp-cpuid_check
cpp-testing-unittest_main
+ yql-utils-simd
)
target_sources(ydb-library-yql-utils-simd-ut PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/simd_ut.cpp
diff --git a/ydb/library/yql/utils/simd/ya.make b/ydb/library/yql/utils/simd/ya.make
index e6a05de3592..65a3367ca50 100644
--- a/ydb/library/yql/utils/simd/ya.make
+++ b/ydb/library/yql/utils/simd/ya.make
@@ -1,19 +1,10 @@
-EXECTEST()
+LIBRARY()
-RUN(
- stream_store
-)
-
-DEPENDS(
- ydb/library/yql/utils/simd/exec/stream_store
-)
+PEERDIR(library/cpp/digest/crc32c)
END()
-RECURSE(
- exec
-)
-
RECURSE_FOR_TESTS(
ut
+ exec
) \ No newline at end of file