aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfixthgame <fixthgame@yandex-team.com>2023-11-03 14:17:02 +0300
committerfixthgame <fixthgame@yandex-team.com>2023-11-03 14:43:57 +0300
commit95fcb69e4e7b744eed6fcf641bb0161dffbbfcaa (patch)
tree324589b8ccc975df6c4e182468722c05052d3a4e
parent60401e3c6fa09feb6f663b912d54566231eb0ef3 (diff)
downloadydb-95fcb69e4e7b744eed6fcf641bb0161dffbbfcaa.tar.gz
TuplePackTest
more logs add test
-rw-r--r--.mapping.json5
-rw-r--r--ydb/library/yql/utils/simd/exec/CMakeLists.txt1
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.darwin-x86_64.txt31
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-aarch64.txt36
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-x86_64.txt38
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.txt17
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.windows-x86_64.txt26
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp175
-rw-r--r--ydb/library/yql/utils/simd/exec/pack_tuple/ya.make9
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt1
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt1
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt1
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt1
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/main.cpp36
-rw-r--r--ydb/library/yql/utils/simd/exec/stream_store/ya.make2
-rw-r--r--ydb/library/yql/utils/simd/exec/ya.make6
16 files changed, 375 insertions, 11 deletions
diff --git a/.mapping.json b/.mapping.json
index 808cc52993..027f520e1f 100644
--- a/.mapping.json
+++ b/.mapping.json
@@ -8790,6 +8790,11 @@
"ydb/library/yql/utils/simd/CMakeLists.txt":"",
"ydb/library/yql/utils/simd/CMakeLists.windows-x86_64.txt":"",
"ydb/library/yql/utils/simd/exec/CMakeLists.txt":"",
+ "ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.darwin-x86_64.txt":"",
+ "ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-aarch64.txt":"",
+ "ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-x86_64.txt":"",
+ "ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.txt":"",
+ "ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.windows-x86_64.txt":"",
"ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt":"",
"ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt":"",
"ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt":"",
diff --git a/ydb/library/yql/utils/simd/exec/CMakeLists.txt b/ydb/library/yql/utils/simd/exec/CMakeLists.txt
index 1949327f1e..a3b4da0bea 100644
--- a/ydb/library/yql/utils/simd/exec/CMakeLists.txt
+++ b/ydb/library/yql/utils/simd/exec/CMakeLists.txt
@@ -6,4 +6,5 @@
# original buildsystem will not be accepted.
+add_subdirectory(pack_tuple)
add_subdirectory(stream_store)
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.darwin-x86_64.txt
new file mode 100644
index 0000000000..ae045da3a7
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.darwin-x86_64.txt
@@ -0,0 +1,31 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pack_tuple)
+target_compile_options(pack_tuple PRIVATE
+ -mavx2
+)
+target_link_libraries(pack_tuple PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-utils-simd
+)
+target_link_options(pack_tuple PRIVATE
+ -Wl,-platform_version,macos,11.0,11.0
+ -fPIC
+ -fPIC
+)
+target_sources(pack_tuple PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp
+)
+target_allocator(pack_tuple
+ system_allocator
+)
+vcs_info(pack_tuple)
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-aarch64.txt
new file mode 100644
index 0000000000..2d36690667
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-aarch64.txt
@@ -0,0 +1,36 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pack_tuple)
+target_compile_options(pack_tuple PRIVATE
+ -mavx2
+)
+target_link_libraries(pack_tuple PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ yql-utils-simd
+)
+target_link_options(pack_tuple PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(pack_tuple PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp
+)
+target_allocator(pack_tuple
+ cpp-malloc-jemalloc
+)
+vcs_info(pack_tuple)
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-x86_64.txt
new file mode 100644
index 0000000000..6299f9b65a
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.linux-x86_64.txt
@@ -0,0 +1,38 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pack_tuple)
+target_compile_options(pack_tuple PRIVATE
+ -mavx2
+)
+target_link_libraries(pack_tuple PUBLIC
+ contrib-libs-linux-headers
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-utils-simd
+)
+target_link_options(pack_tuple PRIVATE
+ -ldl
+ -lrt
+ -Wl,--no-as-needed
+ -fPIC
+ -fPIC
+ -lpthread
+ -lrt
+ -ldl
+)
+target_sources(pack_tuple PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp
+)
+target_allocator(pack_tuple
+ cpp-malloc-tcmalloc
+ libs-tcmalloc-no_percpu_cache
+)
+vcs_info(pack_tuple)
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.txt b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.txt
new file mode 100644
index 0000000000..f8b31df0c1
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.txt
@@ -0,0 +1,17 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+if (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-aarch64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+ include(CMakeLists.darwin-x86_64.txt)
+elseif (WIN32 AND CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" AND NOT HAVE_CUDA)
+ include(CMakeLists.windows-x86_64.txt)
+elseif (CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND NOT HAVE_CUDA)
+ include(CMakeLists.linux-x86_64.txt)
+endif()
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.windows-x86_64.txt
new file mode 100644
index 0000000000..37c13c73e8
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/CMakeLists.windows-x86_64.txt
@@ -0,0 +1,26 @@
+
+# This file was generated by the build system used internally in the Yandex monorepo.
+# Only simple modifications are allowed (adding source-files to targets, adding simple properties
+# like target_include_directories). These modifications will be ported to original
+# ya.make files by maintainers. Any complex modifications which can't be ported back to the
+# original buildsystem will not be accepted.
+
+
+
+add_executable(pack_tuple)
+target_compile_options(pack_tuple PRIVATE
+ -mavx2
+)
+target_link_libraries(pack_tuple PUBLIC
+ contrib-libs-cxxsupp
+ yutil
+ library-cpp-cpuid_check
+ yql-utils-simd
+)
+target_sources(pack_tuple PRIVATE
+ ${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp
+)
+target_allocator(pack_tuple
+ system_allocator
+)
+vcs_info(pack_tuple)
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp b/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp
new file mode 100644
index 0000000000..ed9a4e0444
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/main.cpp
@@ -0,0 +1,175 @@
+#include <util/generic/ptr.h>
+#include <util/system/cpu_id.h>
+#include <util/system/types.h>
+
+#include <ydb/library/yql/utils/simd/simd.h>
+
+struct TPerfomancer {
+ TPerfomancer() = default;
+
+ struct TWrapWorker {
+ virtual int PackTuple(bool log) = 0;
+ virtual ~TWrapWorker() = default;
+ };
+
+ template<typename TTraits>
+ struct TWorker : TWrapWorker {
+ template<typename T>
+ using TSimd = typename TTraits::template TSimd8<T>;
+ TWorker() = default;
+
+ ui8* ShuffleMask(ui32 v[8]) {
+ ui8* det = new ui8[32];
+ for (size_t i = 0; i < 32; i += 1) {
+ det[i] = v[i / 4] + i % 4;
+ }
+ return det;
+ }
+
+ int PackTupleImpl(bool log = true) {
+ if (TTraits::Size != 32)
+ return 1;
+ const ui64 NTuples = 32 << 18;
+ const ui64 TupleSize = sizeof(ui32) + sizeof(ui64);
+
+ ui32 *arrUi32 __attribute__((aligned(32))) = new ui32[NTuples];
+ ui64 *arrUi64 __attribute__((aligned(32))) = new ui64[NTuples];
+
+ for (ui32 i = 0; i < NTuples; i++) {
+ arrUi32[i] = 2 * i;
+ }
+
+ for (ui32 i = 0; i < NTuples; i++) {
+ arrUi64[i] = 2 * i + 1;
+ }
+
+ TSimd<ui8> readReg1, readReg2, readReg1Fwd;
+
+ TSimd<ui8> permReg11, permReg21;
+ TSimd<ui8> permReg12, permReg22;
+
+ TSimd<ui8> permIdx11(ShuffleMask((ui32[8]) {0, 0, 0, 0, 1, 0, 0, 0}));
+ TSimd<ui8> permIdx12(ShuffleMask((ui32[8]) {0, 0, 0, 0, 3, 0, 0, 2}));
+ TSimd<ui8> permIdx1f(ShuffleMask((ui32[8]) {7, 7, 7, 7, 7, 6, 5, 4}));
+
+ TSimd<ui8> permIdx21(ShuffleMask((ui32[8]) {0, 0, 3, 2, 0, 1, 0, 0}));
+ TSimd<ui8> permIdx22(ShuffleMask((ui32[8]) {0, 0, 7, 6, 0, 5, 4, 0}));
+
+ ui32 val1[8], val2[8]; // val3[8];
+
+ using TReg = typename TTraits::TRegister;
+ TSimd<ui8> blended1, blended2;
+
+ TReg *addr1 = (TReg*) arrUi32;
+ TReg *addr2 = (TReg*) arrUi64;
+
+ std::chrono::steady_clock::time_point begin01 =
+ std::chrono::steady_clock::now();
+
+ ui64 accum1 = 0;
+ ui64 accum2 = 0;
+ ui64 accum3 = 0;
+ ui64 accum4 = 0;
+
+ const int blendMask = 0b00110110;
+
+ ui32 hash1 = 0;
+ ui32 hash2 = 0;
+ ui32 hash3 = 0;
+ ui32 hash4 = 0;
+
+ for (ui32 i = 0; i < NTuples; i += 8) {
+ readReg1 = TSimd<ui8>((ui8*) addr1);
+ for (ui32 j = 0; j < 2; j++) {
+
+ permReg11 = readReg1.Shuffle(permIdx11);
+ readReg2 = TSimd<ui8>((ui8*) addr2);
+ addr2++;
+ permReg21 = readReg2.Shuffle(permIdx21);
+ blended1 = permReg11.template Blend16<blendMask>(permReg21);
+ blended1.Store((ui8*) val1);
+
+ hash1 = TSimd<ui8>::CRC32u32(0, val1[0]);
+ hash2 = TSimd<ui8>::CRC32u32(0, val1[3]);
+
+ accum1 += hash1;
+ accum2 += hash2;
+
+ permReg12 = readReg1.Shuffle(permIdx12);
+ permReg22 = readReg2.Shuffle(permIdx22);
+ blended2 = permReg12.template Blend16<blendMask>(permReg12);
+ blended2.Store((ui8*) val2);
+
+ hash3 = TSimd<ui8>::CRC32u32(0, val2[0]);
+ hash4 = TSimd<ui8>::CRC32u32(0, val2[3]);
+
+ accum3 += hash3;
+ accum4 += hash4;
+
+ readReg1Fwd = readReg1.Shuffle(permIdx1f);
+ readReg1Fwd.Store((ui8*) &readReg1.Value);
+
+ }
+ addr1++;
+ }
+
+ Cerr << "Loaded col1 ";
+ readReg1.template Log<ui32>(Cerr);
+ Cerr << "Loaded col2 ";
+ readReg2.template Log<ui32>(Cerr);;
+ Cerr << "Permuted col1 ";
+ permReg11.template Log<ui32>(Cerr);;
+ Cerr << "Permuted col2 ";
+ permReg21.template Log<ui32>(Cerr);
+ Cerr << "Blended ";
+ blended1.template Log<ui32>(Cerr);
+
+
+ std::chrono::steady_clock::time_point end01 =
+ std::chrono::steady_clock::now();
+
+ ui64 microseconds =
+ std::chrono::duration_cast<std::chrono::microseconds>(end01 - begin01).count();
+ if (log) {
+ Cerr << "Accum 1 2 hash: " << accum1 << " " << accum2 << " " << accum3 << " " << accum4 << " "
+ << hash1 << " " << hash2 << " " << hash3 << " " << hash4 << Endl;
+ Cerr << "Time for stream load = " << microseconds << "[microseconds]"
+ << Endl;
+ Cerr << "Data size = " << ((NTuples * TupleSize) / (1024 * 1024))
+ << " [MB]" << Endl;
+ Cerr << "Stream load/save/accum speed = "
+ << (NTuples * TupleSize * 1000 * 1000) /
+ (1024 * 1024 * (microseconds + 1))
+ << " MB/sec" << Endl;
+ Cerr << Endl;
+ }
+ delete[] arrUi32;
+ delete[] arrUi64;
+
+ return 1;
+ }
+
+ int PackTuple(bool log = true) override {
+ return PackTupleImpl(log);
+ }
+
+ ~TWorker() = default;
+ };
+
+ template<typename TTraits>
+ THolder<TWrapWorker> Create() const {
+ return MakeHolder<TWorker<TTraits>>();
+ };
+};
+
+int main() {
+ if (!NX86::HaveAVX2())
+ return 0;
+
+ TPerfomancer tp;
+ auto worker = tp.Create<NSimd::TSimdAVX2Traits>();
+
+ bool fine = true;
+ fine &= worker->PackTuple(true);
+ return !fine;
+} \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/exec/pack_tuple/ya.make b/ydb/library/yql/utils/simd/exec/pack_tuple/ya.make
new file mode 100644
index 0000000000..61c6742c2a
--- /dev/null
+++ b/ydb/library/yql/utils/simd/exec/pack_tuple/ya.make
@@ -0,0 +1,9 @@
+PROGRAM()
+
+SRCS(main.cpp)
+
+CFLAGS(-mavx2)
+
+PEERDIR(ydb/library/yql/utils/simd)
+
+END() \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt
index a99ac27d6f..e5df3ab627 100644
--- a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.darwin-x86_64.txt
@@ -12,6 +12,7 @@ target_link_libraries(stream_store PUBLIC
contrib-libs-cxxsupp
yutil
library-cpp-cpuid_check
+ yql-utils-simd
)
target_link_options(stream_store PRIVATE
-Wl,-platform_version,macos,11.0,11.0
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt
index 049d3baab7..d4dd5985c4 100644
--- a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-aarch64.txt
@@ -12,6 +12,7 @@ target_link_libraries(stream_store PUBLIC
contrib-libs-linux-headers
contrib-libs-cxxsupp
yutil
+ yql-utils-simd
)
target_link_options(stream_store PRIVATE
-ldl
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt
index cbf6c43e02..2dbcba4a9e 100644
--- a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.linux-x86_64.txt
@@ -13,6 +13,7 @@ target_link_libraries(stream_store PUBLIC
contrib-libs-cxxsupp
yutil
library-cpp-cpuid_check
+ yql-utils-simd
)
target_link_options(stream_store PRIVATE
-ldl
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt
index a00f4d06e7..c5757a9974 100644
--- a/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt
+++ b/ydb/library/yql/utils/simd/exec/stream_store/CMakeLists.windows-x86_64.txt
@@ -12,6 +12,7 @@ target_link_libraries(stream_store PUBLIC
contrib-libs-cxxsupp
yutil
library-cpp-cpuid_check
+ yql-utils-simd
)
target_sources(stream_store PRIVATE
${CMAKE_SOURCE_DIR}/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/main.cpp b/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
index 4a25a86852..b879bf6fd7 100644
--- a/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
+++ b/ydb/library/yql/utils/simd/exec/stream_store/main.cpp
@@ -1,4 +1,3 @@
-
#include <util/generic/ptr.h>
#include <util/system/cpu_id.h>
#include <util/system/types.h>
@@ -9,7 +8,7 @@ struct TPerfomancer {
TPerfomancer() = default;
struct TWrapWorker {
- virtual int StreamLoad(bool log) = 0;
+ virtual int StoreStream(bool log) = 0;
virtual ~TWrapWorker() = default;
};
@@ -29,7 +28,7 @@ struct TPerfomancer {
}
}
- int StreamLoad(bool log = true) override {
+ int StoreStream(bool log = true) override {
const size_t batch = 32 / TTraits::Size;
const size_t batch_size = TTraits::Size / 8;
size_t log_batch_size = 0;
@@ -41,8 +40,15 @@ struct TPerfomancer {
log_batch_size = 2;
}
- size_t size = (32LL << 21);
- i64* buf __attribute__((aligned(32))) = new i64[size];
+ const size_t size = (32LL << 21);
+ const size_t arrSize = size / 8;
+
+ i64* buf __attribute__((aligned(32))) = new i64[arrSize];
+
+ for (size_t i = 0; i < arrSize; i += 1) {
+ buf[i] = 0;
+ }
+
i64 tmp[4];
for (size_t i = 0; i < 4; i += 1) {
@@ -56,21 +62,18 @@ struct TPerfomancer {
std::chrono::steady_clock::time_point begin01 =
std::chrono::steady_clock::now();
- const size_t size_loop = size / 8;
-
- for (size_t i = 0; i < size_loop; i += 4) {
+ for (size_t i = 0; i < arrSize; i += 4) {
for (size_t j = 0; j < batch; j += 1) {
tmpSimd[j].StoreStream((i8*)(buf + i + j * batch_size));
}
}
-
std::chrono::steady_clock::time_point end01 =
std::chrono::steady_clock::now();
bool is_ok = true;
- for (size_t i = 0; i < size_loop; i += 1) {
+ for (size_t i = 0; i < arrSize; i += 1) {
if (buf[i] != i % 4) {
is_ok = false;
}
@@ -94,6 +97,14 @@ struct TPerfomancer {
return is_ok;
}
+ ui8* ShuffleMask(ui32 v[8]) {
+ ui8* det = new ui8[32];
+ for (size_t i = 0; i < 32; i += 1) {
+ det[i] = v[i / 4] + i % 4;
+ }
+ return det;
+ }
+
~TWorker() = default;
};
@@ -106,5 +117,8 @@ struct TPerfomancer {
int main() {
TPerfomancer tp;
auto worker = NSimd::SelectSimdTraits(tp);
- return !worker->StreamLoad(false);
+
+ bool fine = true;
+ fine &= worker->StoreStream(true);
+ return !fine;
} \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/exec/stream_store/ya.make b/ydb/library/yql/utils/simd/exec/stream_store/ya.make
index 631001cbdc..704a7b6c0a 100644
--- a/ydb/library/yql/utils/simd/exec/stream_store/ya.make
+++ b/ydb/library/yql/utils/simd/exec/stream_store/ya.make
@@ -2,4 +2,6 @@ PROGRAM()
SRCS(main.cpp)
+PEERDIR(ydb/library/yql/utils/simd)
+
END() \ No newline at end of file
diff --git a/ydb/library/yql/utils/simd/exec/ya.make b/ydb/library/yql/utils/simd/exec/ya.make
index 3fcf4d03e0..5d18536908 100644
--- a/ydb/library/yql/utils/simd/exec/ya.make
+++ b/ydb/library/yql/utils/simd/exec/ya.make
@@ -4,8 +4,13 @@ RUN(
stream_store
)
+RUN(
+ pack_tuple
+)
+
DEPENDS(
ydb/library/yql/utils/simd/exec/stream_store
+ ydb/library/yql/utils/simd/exec/pack_tuple
)
PEERDIR(
@@ -15,5 +20,6 @@ PEERDIR(
END()
RECURSE(
+ pack_tuple
stream_store
) \ No newline at end of file