aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Doronin <fortan57@gmail.com>2022-06-24 17:17:02 +0300
committerOleg Doronin <fortan57@gmail.com>2022-06-24 17:17:02 +0300
commit4ef1d84f3f2f798b9c95129e716afd50cdb46234 (patch)
treebb8cb74b3cafa2818d00cb75365ba9a02e7b6fd7
parent45ad9f6d8690a224820723ce2fe7cad1110c35f4 (diff)
downloadydb-4ef1d84f3f2f798b9c95129e716afd50cdb46234.tar.gz
dict/dictutil deps has been deprecated for ydb YQ-1188
ref:f30a2e6b78c8b333542402dcdb34195d5fe049da
-rw-r--r--ydb/library/yql/minikql/benchmark/pack_num/pack.cpp286
-rw-r--r--ydb/library/yql/minikql/benchmark/pack_num/pack.h29
-rw-r--r--ydb/library/yql/minikql/benchmark/pack_num/pack_num_bench.cpp4
3 files changed, 317 insertions, 2 deletions
diff --git a/ydb/library/yql/minikql/benchmark/pack_num/pack.cpp b/ydb/library/yql/minikql/benchmark/pack_num/pack.cpp
new file mode 100644
index 0000000000..86af3a94f7
--- /dev/null
+++ b/ydb/library/yql/minikql/benchmark/pack_num/pack.cpp
@@ -0,0 +1,286 @@
+#include "pack.h"
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Pack
+//
+
+int Pack(ui8* buf, ui32 value) {
+ ui8* p = buf;
+ for (;;) {
+ ui8 v = (ui8)value & 0x7f;
+ value >>= 7;
+ if (!value) {
+ *p++ = v;
+ break;
+ }
+ *p++ = (v | 0x80);
+ }
+ return (int)(p - buf);
+}
+
+ui32 Unpack(const ui8*& data) {
+ ui32 value = 0;
+ const ui8* p = data;
+ for (int off = 0;; ++p, off += 7) {
+ value |= (*p & 0x7f) << off;
+ if ((*p & 0x80) == 0) {
+ ++p;
+ break;
+ }
+ }
+ data = p;
+ return value;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// PackU32
+//
+
+int PackU32(ui32 value, void* buf) {
+ ui8* p = (ui8*)buf; // 654 3210 <-- bit of 'value' (7 bits total)
+ if (value < (1u << 7)) { // [0--- ----]
+ p[0] = (ui8)value; // ^ ^
+ return 1; // MSB LSB
+ }
+ value -= (1u << 7); // DC BA98 7654 3210 <-- bit of 'value' (14 == 0xD bits total)
+ if (value < (1u << 14)) { // [10-- ----] [---- ----]
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0x80 | (ui8)value;
+ return 2;
+ }
+ value -= (1u << 14); // And so on...
+ if (value < (1u << 21)) { // [110- ----] [---- ----] [---- ----]
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xc0 | (ui8)value;
+ return 3;
+ }
+ value -= (1u << 21);
+ if (value < (1u << 28)) { // [1110 ----] [---- ----] [---- ----] [---- ----]
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xe0 | (ui8)value;
+ return 4;
+ } // ...but for largest numbers, whole first byte is reserved.
+ value -= (1u << 28); // [1111 0000] [---- ----] [---- ----] [---- ----] [---- ----]
+ p[4] = (ui8)value; value >>= 8;
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value;
+ p[0] = 0xf0;
+ return 5;
+}
+
+int UnpackU32(ui32* value, const void* buf) {
+ const ui8* p = (const ui8*)buf;
+ ui8 b = p[0];
+ if (!(b & 0x80)) {
+ *value = b;
+ return 1;
+ }
+ if (!(b & 0x40)) {
+ *value = 0x80 + (((b & 0x3f) << 8) | p[1]);
+ return 2;
+ }
+ if (!(b & 0x20)) {
+ *value = 0x4080 + (((b & 0x1f) << 16) | (p[1] << 8) | p[2]);
+ return 3;
+ }
+ if (!(b & 0x10)) {
+ *value = 0x204080 + (((b & 0x0f) << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+ return 4;
+ }
+ *value = 0x10204080 + ((p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4]);
+ return 5;
+}
+
+int SkipU32(const void* buf) {
+ static const i8 skip[16] = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 5 };
+ int i = ((const ui8*)buf)[0] >> 4;
+ return skip[i];
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// PackU64
+//
+
+int PackU64(ui64 value, void* buf) { // Packing scheme is analogous to PackU32()
+ ui8* p = (ui8*)buf;
+ if (value < (1u << 7)) {
+ p[0] = (ui8)value;
+ return 1;
+ }
+ value -= (1u << 7);
+ if (value < (1u << 14)) {
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0x80 | (ui8)value;
+ return 2;
+ }
+ value -= (1u << 14);
+ if (value < (1u << 21)) {
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xc0 | (ui8)value;
+ return 3;
+ }
+ value -= (1u << 21);
+ if (value < (1u << 28)) {
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xe0 | (ui8)value;
+ return 4;
+ }
+ value -= (1u << 28);
+ if (value < (ui64(1) << 35)) {
+ p[4] = (ui8)value; value >>= 8;
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xf0 | (ui8)value;
+ return 5;
+ }
+ value -= ui64(1) << 35;
+ if (value < (ui64(1) << 42)) {
+ p[5] = (ui8)value; value >>= 8;
+ p[4] = (ui8)value; value >>= 8;
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xf8 | (ui8)value;
+ return 6;
+ }
+ value -= ui64(1) << 42;
+ if (value < (ui64(1) << 49)) { // [1111 110-] [---- ----] ... (+ 5 bytes)
+ p[6] = (ui8)value; value >>= 8;
+ p[5] = (ui8)value; value >>= 8;
+ p[4] = (ui8)value; value >>= 8;
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xfc | (ui8)value;
+ return 7;
+ }
+ value -= ui64(1) << 49;
+ if (value < (ui64(1) << 56)) { // [1111 1110] [---- ----] ... (+ 6 bytes)
+ p[7] = (ui8)value; value >>= 8;
+ p[6] = (ui8)value; value >>= 8;
+ p[5] = (ui8)value; value >>= 8;
+ p[4] = (ui8)value; value >>= 8;
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xfe | (ui8)value;
+ return 8;
+ }
+ value -= ui64(1) << 56; // [1111 1111] [---- ----] ... (+ 7 bytes)
+ p[8] = (ui8)value; value >>= 8; // ^
+ p[7] = (ui8)value; value >>= 8; // not a zero; contains actual bit
+ p[6] = (ui8)value; value >>= 8;
+ p[5] = (ui8)value; value >>= 8;
+ p[4] = (ui8)value; value >>= 8;
+ p[3] = (ui8)value; value >>= 8;
+ p[2] = (ui8)value; value >>= 8;
+ p[1] = (ui8)value; value >>= 8;
+ p[0] = 0xff;
+ return 9;
+}
+
+int UnpackU64(ui64* value, const void* buf) {
+ const ui8* p = (const ui8*)buf;
+ ui8 b = p[0];
+ if (!(b & 0x80)) {
+ *value = b;
+ return 1;
+ }
+ if (!(b & 0x40)) {
+ *value = 0x80 + (((b & 0x3f) << 8) | p[1]);
+ return 2;
+ }
+ if (!(b & 0x20)) {
+ *value = 0x4080 + (((b & 0x1f) << 16) | (p[1] << 8) | p[2]);
+ return 3;
+ }
+ if (!(b & 0x10)) {
+ *value = 0x204080 + (((b & 0x0f) << 24) | (p[1] << 16) | (p[2] << 8) | p[3]);
+ return 4;
+ }
+ if (!(b & 0x08)) {
+ *value = 0x10204080 + (
+ (ui64(b & 0x07) << 32) |
+ (ui32(p[1]) << 24) |
+ ( p[2] << 16) |
+ ( p[3] << 8) |
+ p[4]
+ );
+ return 5;
+ }
+ if (!(b & 0x04)) {
+ *value = 0x0810204080ull + (
+ (ui64(b & 0x03) << 40) |
+ (ui64(p[1]) << 32) |
+ (ui32(p[2]) << 24) |
+ ( p[3] << 16) |
+ ( p[4] << 8) |
+ p[5]
+ );
+ return 6;
+ }
+ if (!(b & 0x02)) {
+ *value = 0x040810204080ull + (
+ (ui64(b & 0x01) << 48) |
+ (ui64(p[1]) << 40) |
+ (ui64(p[2]) << 32) |
+ (ui32(p[3]) << 24) |
+ ( p[4] << 16) |
+ ( p[5] << 8) |
+ p[6]
+ );
+ return 7;
+ }
+ if (!(b & 0x01)) {
+ *value = 0x02040810204080ull + (
+ (ui64(p[1]) << 48) |
+ (ui64(p[2]) << 40) |
+ (ui64(p[3]) << 32) |
+ (ui32(p[4]) << 24) |
+ ( p[5] << 16) |
+ ( p[6] << 8) |
+ p[7]
+ );
+ return 8;
+ }
+ *value = 0x0102040810204080ull + (
+ (ui64(p[1]) << 56) |
+ (ui64(p[2]) << 48) |
+ (ui64(p[3]) << 40) |
+ (ui64(p[4]) << 32) |
+ (ui32(p[5]) << 24) |
+ ( p[6] << 16) |
+ ( p[7] << 8) |
+ p[8]
+ );
+ return 9;
+}
+
+#define REPEAT_16(x) x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x
+#define REPEAT_32(x) REPEAT_16(x), REPEAT_16(x)
+#define REPEAT_64(x) REPEAT_32(x), REPEAT_32(x)
+#define REPEAT_128(x) REPEAT_64(x), REPEAT_64(x)
+
+int SkipU64(const void* buf) {
+ static const i8 skip[256] = {
+ REPEAT_128(1), REPEAT_64(2), REPEAT_32(3), REPEAT_16(4),
+ 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 9
+ };
+ return skip[*(const ui8*)buf];
+}
+
+#undef REPEAT_16
+#undef REPEAT_32
+#undef REPEAT_64
+#undef REPEAT_128
diff --git a/ydb/library/yql/minikql/benchmark/pack_num/pack.h b/ydb/library/yql/minikql/benchmark/pack_num/pack.h
new file mode 100644
index 0000000000..c4dc15fdbf
--- /dev/null
+++ b/ydb/library/yql/minikql/benchmark/pack_num/pack.h
@@ -0,0 +1,29 @@
+#pragma once
+
+#include <util/system/types.h>
+
+const int MAX_BYTES_FOR_UI32 = 5; // max count of bytes to Pack(ui32) or PackU32()
+const int MAX_BYTES_FOR_UI64 = 9; // max count of bytes to PackU64()
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// Pack functions
+//
+
+int Pack(ui8* buf, ui32 value);
+ui32 Unpack(const ui8*& data);
+
+////////////////////////////////////////////////////////////////////////////////
+//
+// PackU32:
+// 1. Preserves sort order
+// 2. Slightly more compact than 'Pack'
+//
+
+int PackU32(ui32 value, void* buf);
+int UnpackU32(ui32* value, const void* buf);
+int SkipU32(const void* buf);
+
+int PackU64(ui64 value, void* buf);
+int UnpackU64(ui64* value, const void* buf);
+int SkipU64(const void* buf);
diff --git a/ydb/library/yql/minikql/benchmark/pack_num/pack_num_bench.cpp b/ydb/library/yql/minikql/benchmark/pack_num/pack_num_bench.cpp
index d49aa682d0..31b86ffcb5 100644
--- a/ydb/library/yql/minikql/benchmark/pack_num/pack_num_bench.cpp
+++ b/ydb/library/yql/minikql/benchmark/pack_num/pack_num_bench.cpp
@@ -1,10 +1,10 @@
+#include "pack.h"
+
#include <ydb/library/yql/minikql/pack_num.h>
#include <library/cpp/testing/benchmark/bench.h>
#include <library/cpp/packedtypes/longs.h>
-#include <dict/dictutil/pack.h>
-
#include <util/generic/xrange.h>
#include <util/generic/singleton.h>
#include <util/random/random.h>