diff options
author | fixthgame <fixthgame@yandex-team.com> | 2023-10-12 18:09:19 +0300 |
---|---|---|
committer | fixthgame <fixthgame@yandex-team.com> | 2023-10-12 18:47:49 +0300 |
commit | deabfde11b4514e0221cb5fac13b072f152ccd8f (patch) | |
tree | 096e3b94493f09d56f738a4638636ae8378a483a | |
parent | bba1559e3cdc2fa94aa9faeeaf8fb7323a6b060f (diff) | |
download | ydb-deabfde11b4514e0221cb5fac13b072f152ccd8f.tar.gz |
SIMD Реализации для Westmere и Haswell
Unit test for SIMM
Westmere and Haswell base.h
Westmere and Haswell Simd
12 files changed, 823 insertions, 0 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/block_join/avx2/begin.h b/ydb/library/yql/minikql/comp_nodes/block_join/avx2/begin.h new file mode 100644 index 00000000000..81d8f3ca346 --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/block_join/avx2/begin.h @@ -0,0 +1,3 @@ +#pragma clang attribute push(__attribute__((target("avx2"))), apply_to=function) + +#include "simd.h"
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/block_join/avx2/end.h b/ydb/library/yql/minikql/comp_nodes/block_join/avx2/end.h new file mode 100644 index 00000000000..fcef763036d --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/block_join/avx2/end.h @@ -0,0 +1 @@ +#pragma clang attribute pop
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/block_join/avx2/simd.h b/ydb/library/yql/minikql/comp_nodes/block_join/avx2/simd.h new file mode 100644 index 00000000000..45beff01d33 --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/block_join/avx2/simd.h @@ -0,0 +1,324 @@ +#pragma once + +#include <cstdint> +#include <immintrin.h> + +namespace NKikimr { +namespace NMiniKQL { +namespace NBlockJoin { +namespace NAVX2 { +namespace NSIMD { + +template <typename T> +struct TSimd8; + +template<typename Child> +struct TBase { + __m256i Value; + + inline TBase() + : Value{__m256i()} { + } + + inline TBase(const __m256i value) + : Value(value) { + } + + inline operator const __m256i&() const { + return this->Value; + } + inline operator __m256i&() { + return this->Value; + } + + inline Child operator|(const Child other) const { + return _mm256_or_si256(*this, other); + } + inline Child operator&(const Child other) const { + return _mm256_and_si256(*this, other); + } + inline Child operator^(const Child other) const { + return _mm256_xor_si256(*this, other); + } + inline Child BitAndNot(const Child other) const { + return _mm256_andnot_si256(*this, other); + }; + inline Child& operator|=(const Child other) { + auto cast = static_cast<Child*>(*this); + *cast = *cast | other; + return *cast; + } + inline Child& operator&=(const Child other) { + auto cast = static_cast<Child*>(*this); + *cast = *cast & other; + return *cast; + }; + inline Child& operator^=(const Child other) { + auto cast = static_cast<Child*>(*this); + *cast = *cast ^ other; + return *cast; + }; +}; + +template<typename T, typename Mask=TSimd8<bool>> +struct TBase8: TBase<TSimd8<T>> { + + inline TBase8() + : TBase<TSimd8<T>>() + { + } + + inline TBase8(const __m256i _value) + : TBase<TSimd8<T>>(_value) + { + } + + friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) { + return _mm256_cmpeq_epi8(lhs, rhs); + } + + static const int SIZE = sizeof(TBase<T>::Value); +}; + +template<> +struct TSimd8<bool>: TBase8<bool> { + + inline TSimd8<bool>() + : TBase8() + { + } + + inline TSimd8<bool>(const __m256i value) + : TBase8<bool>(value) + { + } + + inline TSimd8<bool>(bool value) + : TBase8<bool>(Set(value)) + { + } + + static inline TSimd8<bool> Set(bool value) { + return _mm256_set1_epi8(ui8(-(!!value))); + } + + inline bool Any() const { + return !_mm256_testz_si256(*this, *this); + } + + inline TSimd8<bool> operator~() const { + return *this ^ true; + } +}; + +template<typename T> +struct TBase8Numeric: TBase8<T> { + + inline TBase8Numeric() + : TBase8<T>() + { + } + inline TBase8Numeric(const __m256i value) + : TBase8<T>(value) + { + } + + static inline TSimd8<T> Set(T value) { + return _mm256_set1_epi8(value); + } + static inline TSimd8<T> Zero() { + return _mm256_setzero_si256(); + } + static inline TSimd8<T> Load(const T values[32]) { + return _mm256_loadu_si256(reinterpret_cast<const __m256i *>(values)); + } + + static inline TSimd8<T> Repeat16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return TSimd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + inline void Store(T dst[32]) const { + return _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst), *this); + } + + inline TSimd8<T> operator+(const TSimd8<T> other) const { + return _mm256_add_epi8(*this, other); + } + inline TSimd8<T> operator-(const TSimd8<T> other) const { + return _mm256_sub_epi8(*this, other); + } + inline TSimd8<T>& operator+=(const TSimd8<T> other) { + *this = *this + other; + return *static_cast<TSimd8<T>*>(this); + } + inline TSimd8<T>& operator-=(const TSimd8<T> other) { + *this = *this - other; + return *static_cast<TSimd8<T>*>(this); + } + + // 0xFFu = 11111111 = 2^8 - 1 + inline TSimd8<T> operator~() const { + return *this ^ 0xFFu; + } +}; + +template<> +struct TSimd8<i8> : TBase8Numeric<i8> { + inline TSimd8() + : TBase8Numeric<i8>() + { + } + inline TSimd8(const __m256i value) + : TBase8Numeric<i8>(value) + { + } + inline TSimd8(i8 value) + : TSimd8(Set(value)) + { + } + inline TSimd8(const i8 values[32]) + : TSimd8(Load(values)) + { + } + inline TSimd8( + i8 v0, i8 v1, i8 v2, i8 v3, i8 v4, i8 v5, i8 v6, i8 v7, + i8 v8, i8 v9, i8 v10, i8 v11, i8 v12, i8 v13, i8 v14, i8 v15, + i8 v16, i8 v17, i8 v18, i8 v19, i8 v20, i8 v21, i8 v22, i8 v23, + i8 v24, i8 v25, i8 v26, i8 v27, i8 v28, i8 v29, i8 v30, i8 v31 + ) : TSimd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) + { + } + + inline static TSimd8<i8> Repeat16( + i8 v0, i8 v1, i8 v2, i8 v3, i8 v4, i8 v5, i8 v6, i8 v7, + i8 v8, i8 v9, i8 v10, i8 v11, i8 v12, i8 v13, i8 v14, i8 v15 + ) { + return TSimd8<i8>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + inline TSimd8<i8> MaxValue(const TSimd8<i8> other) const { + return _mm256_max_epi8(*this, other); + } + inline TSimd8<i8> MinValue(const TSimd8<i8> other) const { + return _mm256_min_epi8(*this, other); + } + inline TSimd8<bool> operator>(const TSimd8<i8> other) const { + return _mm256_cmpgt_epi8(*this, other); + } + inline TSimd8<bool> operator<(const TSimd8<i8> other) const { + return _mm256_cmpgt_epi8(other, *this); + } +}; + +template<> +struct TSimd8<ui8>: TBase8Numeric<ui8> { + inline TSimd8() + : TBase8Numeric<ui8>() + { + } + inline TSimd8(const __m256i _value) + : TBase8Numeric<ui8>(_value) + { + } + inline TSimd8(ui8 _value) + : TSimd8(Set(_value)) + { + } + inline TSimd8(const ui8 values[32]) + : TSimd8(Load(values)) + { + } + inline TSimd8( + ui8 v0, ui8 v1, ui8 v2, ui8 v3, ui8 v4, ui8 v5, ui8 v6, ui8 v7, + ui8 v8, ui8 v9, ui8 v10, ui8 v11, ui8 v12, ui8 v13, ui8 v14, ui8 v15, + ui8 v16, ui8 v17, ui8 v18, ui8 v19, ui8 v20, ui8 v21, ui8 v22, ui8 v23, + ui8 v24, ui8 v25, ui8 v26, ui8 v27, ui8 v28, ui8 v29, ui8 v30, ui8 v31 + ) : TSimd8(_mm256_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v16,v17,v18,v19,v20,v21,v22,v23, + v24,v25,v26,v27,v28,v29,v30,v31 + )) {} + + inline static TSimd8<ui8> Repeat16( + ui8 v0, ui8 v1, ui8 v2, ui8 v3, ui8 v4, ui8 v5, ui8 v6, ui8 v7, + ui8 v8, ui8 v9, ui8 v10, ui8 v11, ui8 v12, ui8 v13, ui8 v14, ui8 v15 + ) { + return TSimd8<ui8>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15, + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + inline TSimd8<ui8> MaxValue(const TSimd8<ui8> other) const { + return _mm256_max_epu8(*this, other); + } + inline TSimd8<ui8> MinValue(const TSimd8<ui8> other) const { + return _mm256_min_epu8(other, *this); + } + inline TSimd8<bool> operator<=(const TSimd8<ui8> other) const { + return other.MaxValue(*this) == other; + } + inline TSimd8<bool> operator>=(const TSimd8<ui8> other) const { + return other.MinValue(*this) == other; + } + + inline TSimd8<bool> BitsNotSet() const { + return *this == ui8(0); + } + inline TSimd8<bool> AnyBitsSet() const { + return ~this->BitsNotSet(); + } + inline bool BitsNotSetAnywhere() const { + return _mm256_testz_si256(*this, *this); + } + inline bool AnyBitsSetAnywhere() const { + return !BitsNotSetAnywhere(); + } + inline bool BitsNotSetAnywhere(TSimd8<ui8> bits) const { + return _mm256_testz_si256(*this, bits); + } + inline bool AnyBitsSetAnywhere(TSimd8<ui8> bits) const { + return !BitsNotSetAnywhere(bits); + } + + template<int N> + inline TSimd8<ui8> Shr() const { + return TSimd8<ui8>(_mm256_srli_epi16(*this, N)) & ui8(0xFFu >> N); + } + template<int N> + inline TSimd8<ui8> Shl() const { + return TSimd8<ui8>(_mm256_slli_epi16(*this, N)) & ui8(0xFFu << N); + } + + template<int N> + inline int GetBit() const { + return _mm256_movemask_epi8(_mm256_slli_epi16(*this, 7-N)); + } +}; + +} +} +} +} +}
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/block_join/sse42/begin.h b/ydb/library/yql/minikql/comp_nodes/block_join/sse42/begin.h new file mode 100644 index 00000000000..00fffa0930b --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/block_join/sse42/begin.h @@ -0,0 +1,3 @@ +#pragma clang attribute push(__attribute__((target("sse4.2"))), apply_to=function) + +#include "simd.h"
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/block_join/sse42/end.h b/ydb/library/yql/minikql/comp_nodes/block_join/sse42/end.h new file mode 100644 index 00000000000..fcef763036d --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/block_join/sse42/end.h @@ -0,0 +1 @@ +#pragma clang attribute pop
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/block_join/sse42/simd.h b/ydb/library/yql/minikql/comp_nodes/block_join/sse42/simd.h new file mode 100644 index 00000000000..71201ce40a7 --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/block_join/sse42/simd.h @@ -0,0 +1,310 @@ +#pragma once + +#include <cstdint> +#include <immintrin.h> + +namespace NKikimr { +namespace NMiniKQL { +namespace NBlockJoin { +namespace NSSE42 { +namespace NSIMD { + +template <typename T> +struct TSimd8; + +template<typename Child> +struct TBase { + __m128i Value; + + inline TBase() + : Value{__m128i()} { + } + + inline TBase(const __m128i value) + : Value(value) { + } + + inline operator const __m128i&() const { + return this->Value; + } + inline operator __m128i&() { + return this->Value; + } + + inline Child operator|(const Child other) const { + return _mm_or_si128(*this, other); + } + inline Child operator&(const Child other) const { + return _mm_and_si128(*this, other); + } + inline Child operator^(const Child other) const { + return _mm_xor_si128(*this, other); + } + inline Child BitAndNot(const Child other) const { + return _mm_andnot_si128(*this, other); + }; + inline Child& operator|=(const Child other) { + auto cast = static_cast<Child*>(*this); + *cast = *cast | other; + return *cast; + } + inline Child& operator&=(const Child other) { + auto cast = static_cast<Child*>(*this); + *cast = *cast & other; + return *cast; + }; + inline Child& operator^=(const Child other) { + auto cast = static_cast<Child*>(*this); + *cast = *cast ^ other; + return *cast; + }; +}; + +template<typename T, typename Mask=TSimd8<bool>> +struct TBase8: TBase<TSimd8<T>> { + + inline TBase8() + : TBase<TSimd8<T>>() + { + } + + inline TBase8(const __m128i _value) + : TBase<TSimd8<T>>(_value) + { + } + + friend inline Mask operator==(const TSimd8<T> lhs, const TSimd8<T> rhs) { + return _mm_cmpeq_epi8(lhs, rhs); + } + + static const int SIZE = sizeof(TBase<T>::Value); +}; + +template<> +struct TSimd8<bool>: TBase8<bool> { + + inline TSimd8<bool>() + : TBase8() + { + } + + inline TSimd8<bool>(const __m128i value) + : TBase8<bool>(value) + { + } + + inline TSimd8<bool>(bool value) + : TBase8<bool>(Set(value)) + { + } + + static inline TSimd8<bool> Set(bool value) { + return _mm_set1_epi8(ui8(-(!!value))); + } + + inline bool Any() const { + return !_mm_testz_si128(*this, *this); + } + + inline TSimd8<bool> operator~() const { + return *this ^ true; + } +}; + +template<typename T> +struct TBase8Numeric: TBase8<T> { + + inline TBase8Numeric() + : TBase8<T>() + { + } + inline TBase8Numeric(const __m128i value) + : TBase8<T>(value) + { + } + + static inline TSimd8<T> Set(T value) { + return _mm_set1_epi8(value); + } + static inline TSimd8<T> Zero() { + return _mm_setzero_si128(); + } + static inline TSimd8<T> Load(const T values[16]) { + return _mm_loadu_si128(reinterpret_cast<const __m128i *>(values)); + } + + static inline TSimd8<T> Repeat16( + T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, + T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15 + ) { + return TSimd8<T>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + inline void Store(T dst[16]) const { + return _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), *this); + } + + inline TSimd8<T> operator+(const TSimd8<T> other) const { + return _mm_add_epi8(*this, other); + } + inline TSimd8<T> operator-(const TSimd8<T> other) const { + return _mm_sub_epi8(*this, other); + } + inline TSimd8<T>& operator+=(const TSimd8<T> other) { + *this = *this + other; + return *static_cast<TSimd8<T>*>(this); + } + inline TSimd8<T>& operator-=(const TSimd8<T> other) { + *this = *this - other; + return *static_cast<TSimd8<T>*>(this); + } + + // 0xFFu = 11111111 = 2^8 - 1 + inline TSimd8<T> operator~() const { + return *this ^ 0xFFu; + } +}; + +template<> +struct TSimd8<i8> : TBase8Numeric<i8> { + inline TSimd8() + : TBase8Numeric<i8>() + { + } + inline TSimd8(const __m128i value) + : TBase8Numeric<i8>(value) + { + } + inline TSimd8(i8 value) + : TSimd8(Set(value)) + { + } + inline TSimd8(const i8 values[16]) + : TSimd8(Load(values)) + { + } + inline TSimd8( + i8 v0, i8 v1, i8 v2, i8 v3, i8 v4, i8 v5, i8 v6, i8 v7, + i8 v8, i8 v9, i8 v10, i8 v11, i8 v12, i8 v13, i8 v14, i8 v15 + ) : TSimd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) + { + } + + inline static TSimd8<i8> Repeat16( + i8 v0, i8 v1, i8 v2, i8 v3, i8 v4, i8 v5, i8 v6, i8 v7, + i8 v8, i8 v9, i8 v10, i8 v11, i8 v12, i8 v13, i8 v14, i8 v15 + ) { + return TSimd8<i8>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + inline TSimd8<i8> MaxValue(const TSimd8<i8> other) const { + return _mm_max_epi8(*this, other); + } + inline TSimd8<i8> MinValue(const TSimd8<i8> other) const { + return _mm_min_epi8(*this, other); + } + inline TSimd8<bool> operator>(const TSimd8<i8> other) const { + return _mm_cmpgt_epi8(*this, other); + } + inline TSimd8<bool> operator<(const TSimd8<i8> other) const { + return _mm_cmpgt_epi8(other, *this); + } +}; + +template<> +struct TSimd8<ui8>: TBase8Numeric<ui8> { + inline TSimd8() + : TBase8Numeric<ui8>() + { + } + inline TSimd8(const __m128i _value) + : TBase8Numeric<ui8>(_value) + { + } + inline TSimd8(ui8 _value) + : TSimd8(Set(_value)) + { + } + inline TSimd8(const ui8 values[16]) + : TSimd8(Load(values)) + { + } + inline TSimd8( + ui8 v0, ui8 v1, ui8 v2, ui8 v3, ui8 v4, ui8 v5, ui8 v6, ui8 v7, + ui8 v8, ui8 v9, ui8 v10, ui8 v11, ui8 v12, ui8 v13, ui8 v14, ui8 v15 + ) : TSimd8(_mm_setr_epi8( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + )) {} + + inline static TSimd8<ui8> Repeat16( + ui8 v0, ui8 v1, ui8 v2, ui8 v3, ui8 v4, ui8 v5, ui8 v6, ui8 v7, + ui8 v8, ui8 v9, ui8 v10, ui8 v11, ui8 v12, ui8 v13, ui8 v14, ui8 v15 + ) { + return TSimd8<ui8>( + v0, v1, v2, v3, v4, v5, v6, v7, + v8, v9, v10,v11,v12,v13,v14,v15 + ); + } + + inline TSimd8<ui8> MaxValue(const TSimd8<ui8> other) const { + return _mm_max_epu8(*this, other); + } + inline TSimd8<ui8> MinValue(const TSimd8<ui8> other) const { + return _mm_min_epu8(other, *this); + } + inline TSimd8<bool> operator<=(const TSimd8<ui8> other) const { + return other.MaxValue(*this) == other; + } + inline TSimd8<bool> operator>=(const TSimd8<ui8> other) const { + return other.MinValue(*this) == other; + } + + inline TSimd8<bool> BitsNotSet() const { + return *this == ui8(0); + } + inline TSimd8<bool> AnyBitsSet() const { + return ~this->BitsNotSet(); + } + inline bool BitsNotSetAnywhere() const { + return _mm_testz_si128(*this, *this); + } + inline bool AnyBitsSetAnywhere() const { + return !BitsNotSetAnywhere(); + } + inline bool BitsNotSetAnywhere(TSimd8<ui8> bits) const { + return _mm_testz_si128(*this, bits); + } + inline bool AnyBitsSetAnywhere(TSimd8<ui8> bits) const { + return !BitsNotSetAnywhere(bits); + } + + template<int N> + inline TSimd8<ui8> Shr() const { + return TSimd8<ui8>(_mm_srli_epi16(*this, N)) & ui8(0xFFu >> N); + } + template<int N> + inline TSimd8<ui8> Shl() const { + return TSimd8<ui8>(_mm_slli_epi16(*this, N)) & ui8(0xFFu << N); + } + + template<int N> + inline int GetBit() const { + return _mm_movemask_epi8(_mm_slli_epi16(*this, 7-N)); + } +}; + +} +} +} +} +}
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt index 8d2f0c72220..46f14d57bbc 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.darwin-x86_64.txt @@ -66,6 +66,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_variant_ut.cpp diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt index 36c36db8dd0..51201f10e6c 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-aarch64.txt @@ -69,6 +69,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_variant_ut.cpp diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt index 3ba4890e0ce..8a15c2fc377 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.linux-x86_64.txt @@ -70,6 +70,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_variant_ut.cpp diff --git a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt index e7c59de3ad1..d2c5ed73e1e 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/minikql/comp_nodes/ut/CMakeLists.windows-x86_64.txt @@ -59,6 +59,7 @@ target_sources(ydb-library-yql-minikql-comp_nodes-ut PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_match_recognize_nfa_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_safe_circular_buffer_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_sort_ut.cpp + ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_switch_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_todict_ut.cpp ${CMAKE_SOURCE_DIR}/ydb/library/yql/minikql/comp_nodes/ut/mkql_variant_ut.cpp diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp new file mode 100644 index 00000000000..70ebcb8a22c --- /dev/null +++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_simd_ut.cpp @@ -0,0 +1,176 @@ +#include <library/cpp/testing/unittest/registar.h> +#include <util/system/cpu_id.h> + +#if __AVX2__ +#include <ydb/library/yql/minikql/comp_nodes/block_join/avx2/begin.h> +Y_UNIT_TEST_SUITE(TMiniKQLBlockJoinHaswell) { + using namespace NKikimr::NMiniKQL::NBlockJoin::NAVX2::NSIMD; + Y_UNIT_TEST(SimdBool) { + TSimd8<bool> tr(true); + TSimd8<bool> fal(false); + UNIT_ASSERT_EQUAL(tr.Any(), true); + UNIT_ASSERT_EQUAL(fal.Any(), false); + UNIT_ASSERT_UNEQUAL(tr.Any(), fal.Any()); + UNIT_ASSERT_EQUAL(tr.Any(), (tr ^ fal).Any()); + UNIT_ASSERT_EQUAL(fal.Any(), (tr ^ tr).Any()); + UNIT_ASSERT_EQUAL(fal.Any(), (tr & fal).Any()); + UNIT_ASSERT_EQUAL((~tr).Any(), fal.Any()); + UNIT_ASSERT_EQUAL((~fal).Any(), tr.Any()); + + TSimd8<bool> bit_or = tr | fal; + UNIT_ASSERT_EQUAL(bit_or.Any(), tr.Any()); + + TSimd8<bool> tr_m(_mm256_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1)); + UNIT_ASSERT_EQUAL((tr_m == tr).Any(), TSimd8<bool>(true).Any()); + } + Y_UNIT_TEST(SimdUInt) { + __m256i x = _mm256_set1_epi8(0U); + uint8_t arr[32]; + for (auto &i : arr) { + i = 0; + } + TSimd8<uint8_t> a(x), b(arr), c(uint8_t(0)); + UNIT_ASSERT_EQUAL((a == b).Any(), true); + UNIT_ASSERT_EQUAL((b == c).Any(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<uint8_t>::Zero()).Any(), true); + + a = TSimd8<uint8_t>(uint8_t(50)); + b = TSimd8<uint8_t>(uint8_t(49)); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + + UNIT_ASSERT_EQUAL(c.BitsNotSet().Any(), true); + UNIT_ASSERT_EQUAL(a.BitsNotSet().Any(), false); + UNIT_ASSERT_EQUAL(a.AnyBitsSet().Any(), true); + + + TSimd8<uint8_t> a2(uint8_t(100)); + TSimd8<uint8_t> a3(uint8_t(25)); + UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).Any(), true); + UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).Any(), true); + UNIT_ASSERT_EQUAL((a.Shr<8>() == c).Any(), true); + } + + Y_UNIT_TEST(SimdInt) { + __m256i x = _mm256_set1_epi8(0); + int8_t arr[32]; + for (auto &i : arr) { + i = 0; + } + TSimd8<int8_t> a(x), b(arr), c(int8_t(0)); + UNIT_ASSERT_EQUAL((a == b).Any(), true); + UNIT_ASSERT_EQUAL((b == c).Any(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<int8_t>::Zero()).Any(), true); + + a = TSimd8<int8_t>(int8_t(50)); + b = TSimd8<int8_t>(int8_t(49)); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + + + TSimd8<int8_t> a2(int8_t(5)); + TSimd8<int8_t> a3(int8_t(25)); + a = TSimd8<int8_t>(int8_t(15)); + b = TSimd8<int8_t>(int8_t(10)); + UNIT_ASSERT_EQUAL(((a + b) == a3).Any(), true); + UNIT_ASSERT_EQUAL(((a - b) == a2).Any(), true); + } +} + +#include <ydb/library/yql/minikql/comp_nodes/block_join/avx2/end.h> +#else +Y_UNIT_TEST_SUITE(TMiniKQLBlockJoinHaswell) { + Y_UNIT_TEST(SimdBool) {} + Y_UNIT_TEST(SimdUInt) {} + Y_UNIT_TEST(SimdInt) {} +} +#endif + +#if __SSE4_2__ +#include <ydb/library/yql/minikql/comp_nodes/block_join/sse42/begin.h> +Y_UNIT_TEST_SUITE(TMiniKQLBlockJoinWestmere) { + using namespace NKikimr::NMiniKQL::NBlockJoin::NSSE42::NSIMD; + Y_UNIT_TEST(SimdBool) { + + TSimd8<bool> tr(true); + TSimd8<bool> fal(false); + UNIT_ASSERT_EQUAL(tr.Any(), true); + UNIT_ASSERT_EQUAL(fal.Any(), false); + UNIT_ASSERT_UNEQUAL(tr.Any(), fal.Any()); + UNIT_ASSERT_EQUAL(tr.Any(), (tr ^ fal).Any()); + UNIT_ASSERT_EQUAL(fal.Any(), (tr ^ tr).Any()); + UNIT_ASSERT_EQUAL(fal.Any(), (tr & fal).Any()); + UNIT_ASSERT_EQUAL((~tr).Any(), fal.Any()); + UNIT_ASSERT_EQUAL((~fal).Any(), tr.Any()); + + TSimd8<bool> bit_or = tr | fal; + UNIT_ASSERT_EQUAL(bit_or.Any(), tr.Any()); + + TSimd8<bool> tr_m(_mm_set_epi32(-1, -1, -1, -1)); + UNIT_ASSERT_EQUAL((tr_m == tr).Any(), TSimd8<bool>(true).Any()); + } + Y_UNIT_TEST(SimdUInt) { + __m128i x = _mm_set1_epi8(0U); + uint8_t arr[16]; + for (auto &i : arr) { + i = 0; + } + TSimd8<uint8_t> a(x), b(arr), c(uint8_t(0)); + UNIT_ASSERT_EQUAL((a == b).Any(), true); + UNIT_ASSERT_EQUAL((b == c).Any(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<uint8_t>::Zero()).Any(), true); + + a = TSimd8<uint8_t>(uint8_t(50)); + b = TSimd8<uint8_t>(uint8_t(49)); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + + UNIT_ASSERT_EQUAL(c.BitsNotSet().Any(), true); + UNIT_ASSERT_EQUAL(a.BitsNotSet().Any(), false); + UNIT_ASSERT_EQUAL(a.AnyBitsSet().Any(), true); + + + TSimd8<uint8_t> a2(uint8_t(100)); + TSimd8<uint8_t> a3(uint8_t(25)); + UNIT_ASSERT_EQUAL((a.Shl<1>() == a2).Any(), true); + UNIT_ASSERT_EQUAL((a.Shr<1>() == a3).Any(), true); + UNIT_ASSERT_EQUAL((a.Shr<8>() == c).Any(), true); + } + + Y_UNIT_TEST(SimdInt) { + __m128i x = _mm_set1_epi8(0); + int8_t arr[16]; + for (auto &i : arr) { + i = 0; + } + TSimd8<int8_t> a(x), b(arr), c(int8_t(0)); + UNIT_ASSERT_EQUAL((a == b).Any(), true); + UNIT_ASSERT_EQUAL((b == c).Any(), true); + UNIT_ASSERT_EQUAL((c == TSimd8<int8_t>::Zero()).Any(), true); + + a = TSimd8<int8_t>(int8_t(50)); + b = TSimd8<int8_t>(int8_t(49)); + UNIT_ASSERT_EQUAL((a.MaxValue(b) == a).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == b).Any(), true); + UNIT_ASSERT_EQUAL((a.MinValue(b) == a).Any(), false); + + + TSimd8<int8_t> a2(int8_t(5)); + TSimd8<int8_t> a3(int8_t(25)); + a = TSimd8<int8_t>(int8_t(15)); + b = TSimd8<int8_t>(int8_t(10)); + UNIT_ASSERT_EQUAL(((a + b) == a3).Any(), true); + UNIT_ASSERT_EQUAL(((a - b) == a2).Any(), true); + } +} +#include <ydb/library/yql/minikql/comp_nodes/block_join/sse42/end.h> +#else +Y_UNIT_TEST_SUITE(TMiniKQLBlockJoinWestmere) { + Y_UNIT_TEST(SimdBool) {} + Y_UNIT_TEST(SimdUInt) {} + Y_UNIT_TEST(SimdInt) {} +} +#endif
\ No newline at end of file diff --git a/ydb/library/yql/minikql/comp_nodes/ut/ya.make b/ydb/library/yql/minikql/comp_nodes/ut/ya.make index a883fed1a10..9d7ad954076 100644 --- a/ydb/library/yql/minikql/comp_nodes/ut/ya.make +++ b/ydb/library/yql/minikql/comp_nodes/ut/ya.make @@ -48,6 +48,7 @@ SRCS( mkql_match_recognize_nfa_ut.cpp mkql_safe_circular_buffer_ut.cpp mkql_sort_ut.cpp + mkql_simd_ut.cpp mkql_switch_ut.cpp mkql_todict_ut.cpp mkql_variant_ut.cpp |