intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <[email protected]> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <[email protected]> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yt/coding
8 files changed, 578 insertions, 0 deletions
diff --git a/library/cpp/yt/coding/unittests/varint_ut.cpp b/library/cpp/yt/coding/unittests/varint_ut.cpp
new file mode 100644
index 00000000000..ed83ab5c92b
--- /dev/null
+++ b/library/cpp/yt/coding/unittests/varint_ut.cpp
@@ -0,0 +1,134 @@
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <library/cpp/yt/coding/varint.h>
+
+#include <util/random/random.h>
+
+#include <util/string/escape.h>
+
+#include <tuple>
+
+namespace NYT {
+namespace {
+
+using ::testing::Values;
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TWriteVarIntTest: public ::testing::TestWithParam<std::tuple<ui64, TString> >
+{ };
+
+TEST_P(TWriteVarIntTest, Serialization)
+{
+    ui64 value = std::get<0>(GetParam());
+    TString rightAnswer = std::get<1>(GetParam());
+
+    TStringStream outputStream;
+    WriteVarUint64(&outputStream, value);
+    EXPECT_EQ(rightAnswer, outputStream.Str());
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+class TReadVarIntTest: public ::testing::TestWithParam<std::tuple<ui64, TString> >
+{ };
+
+TEST_P(TReadVarIntTest, Serialization)
+{
+    ui64 rightAnswer = std::get<0>(GetParam());
+    TString input = std::get<1>(GetParam());
+
+    TStringInput inputStream(input);
+    ui64 value;
+    ReadVarUint64(&inputStream, &value);
+    EXPECT_EQ(rightAnswer, value);
+}
+
+TEST(TReadVarIntTest, Overflow)
+{
+    TString input("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 11);
+    TStringInput inputStream(input);
+    ui64 value;
+    EXPECT_ANY_THROW(ReadVarUint64(&inputStream, &value));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+auto ValuesForVarIntTests = Values(
+    // Simple cases.
+    std::make_tuple(0x0ull,                TString("\x00", 1)),
+    std::make_tuple(0x1ull,                TString("\x01", 1)),
+    std::make_tuple(0x2ull,                TString("\x02", 1)),
+    std::make_tuple(0x3ull,                TString("\x03", 1)),
+    std::make_tuple(0x4ull,                TString("\x04", 1)),
+
+    // The following "magic numbers" are critical points for varint encoding.
+    std::make_tuple((1ull << 7) - 1,       TString("\x7f", 1)),
+    std::make_tuple((1ull << 7),           TString("\x80\x01", 2)),
+    std::make_tuple((1ull << 14) - 1,      TString("\xff\x7f", 2)),
+    std::make_tuple((1ull << 14),          TString("\x80\x80\x01", 3)),
+    std::make_tuple((1ull << 21) - 1,      TString("\xff\xff\x7f", 3)),
+    std::make_tuple((1ull << 21),          TString("\x80\x80\x80\x01", 4)),
+    std::make_tuple((1ull << 28) - 1,      TString("\xff\xff\xff\x7f", 4)),
+    std::make_tuple((1ull << 28),          TString("\x80\x80\x80\x80\x01", 5)),
+    std::make_tuple((1ull << 35) - 1,      TString("\xff\xff\xff\xff\x7f", 5)),
+    std::make_tuple((1ull << 35),          TString("\x80\x80\x80\x80\x80\x01", 6)),
+    std::make_tuple((1ull << 42) - 1,      TString("\xff\xff\xff\xff\xff\x7f", 6)),
+    std::make_tuple((1ull << 42),          TString("\x80\x80\x80\x80\x80\x80\x01", 7)),
+    std::make_tuple((1ull << 49) - 1,      TString("\xff\xff\xff\xff\xff\xff\x7f", 7)),
+    std::make_tuple((1ull << 49),          TString("\x80\x80\x80\x80\x80\x80\x80\x01", 8)),
+    std::make_tuple((1ull << 56) - 1,      TString("\xff\xff\xff\xff\xff\xff\xff\x7f", 8)),
+    std::make_tuple((1ull << 56),          TString("\x80\x80\x80\x80\x80\x80\x80\x80\x01", 9)),
+    std::make_tuple((1ull << 63) - 1,      TString("\xff\xff\xff\xff\xff\xff\xff\xff\x7f", 9)),
+    std::make_tuple((1ull << 63),          TString("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 10)),
+
+    // Boundary case.
+    std::make_tuple(static_cast<ui64>(-1), TString("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01", 10))
+);
+
+INSTANTIATE_TEST_SUITE_P(ValueParametrized, TWriteVarIntTest,
+    ValuesForVarIntTests);
+
+INSTANTIATE_TEST_SUITE_P(ValueParametrized, TReadVarIntTest,
+    ValuesForVarIntTests);
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TVarInt32Test, RandomValues)
+{
+    srand(100500); // Set seed
+    const int numberOfValues = 10000;
+
+    TStringStream stream;
+    for (int i = 0; i < numberOfValues; ++i) {
+        i32 expected = static_cast<i32>(RandomNumber<ui32>());
+        WriteVarInt32(&stream, expected);
+        i32 actual;
+        ReadVarInt32(&stream, &actual);
+        EXPECT_EQ(expected, actual)
+            << "Encoded Variant: " << EscapeC(stream.Str());
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TVarInt64Test, RandomValues)
+{
+    srand(100500); // Set seed
+    const int numberOfValues = 10000;
+
+    TStringStream stream;
+    for (int i = 0; i < numberOfValues; ++i) {
+        i64 expected = static_cast<i64>(RandomNumber<ui64>());
+        WriteVarInt64(&stream, expected);
+        i64 actual;
+        ReadVarInt64(&stream, &actual);
+        EXPECT_EQ(expected, actual)
+            << "Encoded Variant: " << EscapeC(stream.Str());
+    }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/library/cpp/yt/coding/unittests/ya.make b/library/cpp/yt/coding/unittests/ya.make
new file mode 100644
index 00000000000..e0622db22d2
--- /dev/null
+++ b/library/cpp/yt/coding/unittests/ya.make
@@ -0,0 +1,15 @@
+GTEST()
+
+OWNER(g:yt)
+
+SRCS(
+    zig_zag_ut.cpp
+    varint_ut.cpp
+)
+
+PEERDIR(
+    library/cpp/yt/coding
+    library/cpp/testing/gtest
+)
+
+END()
diff --git a/library/cpp/yt/coding/unittests/zig_zag_ut.cpp b/library/cpp/yt/coding/unittests/zig_zag_ut.cpp
new file mode 100644
index 00000000000..fae4e63064e
--- /dev/null
+++ b/library/cpp/yt/coding/unittests/zig_zag_ut.cpp
@@ -0,0 +1,57 @@
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <library/cpp/yt/coding/zig_zag.h>
+
+namespace NYT {
+namespace {
+
+////////////////////////////////////////////////////////////////////////////////
+
+TEST(TZigZagTest, Encode32)
+{
+    EXPECT_EQ(0u, ZigZagEncode32( 0));
+    EXPECT_EQ(1u, ZigZagEncode32(-1));
+    EXPECT_EQ(2u, ZigZagEncode32( 1));
+    EXPECT_EQ(3u, ZigZagEncode32(-2));
+    // ...
+    EXPECT_EQ(std::numeric_limits<ui32>::max() - 1, ZigZagEncode32(std::numeric_limits<i32>::max()));
+    EXPECT_EQ(std::numeric_limits<ui32>::max(),     ZigZagEncode32(std::numeric_limits<i32>::min()));
+}
+
+TEST(TZigZagTest, Decode32)
+{
+    EXPECT_EQ( 0, ZigZagDecode32(0));
+    EXPECT_EQ(-1, ZigZagDecode32(1));
+    EXPECT_EQ( 1, ZigZagDecode32(2));
+    EXPECT_EQ(-2, ZigZagDecode32(3));
+    // ...
+    EXPECT_EQ(std::numeric_limits<i32>::max(), ZigZagDecode32(std::numeric_limits<ui32>::max() - 1));
+    EXPECT_EQ(std::numeric_limits<i32>::min(), ZigZagDecode32(std::numeric_limits<ui32>::max()));
+}
+
+TEST(TZigZagTest, Encode64)
+{
+    EXPECT_EQ(0ull, ZigZagEncode64( 0));
+    EXPECT_EQ(1ull, ZigZagEncode64(-1));
+    EXPECT_EQ(2ull, ZigZagEncode64( 1));
+    EXPECT_EQ(3ull, ZigZagEncode64(-2));
+    // ...
+    EXPECT_EQ(std::numeric_limits<ui64>::max() - 1, ZigZagEncode64(std::numeric_limits<i64>::max()));
+    EXPECT_EQ(std::numeric_limits<ui64>::max(),     ZigZagEncode64(std::numeric_limits<i64>::min()));
+}
+
+TEST(TZigZagTest, Decode64)
+{
+    EXPECT_EQ(ZigZagDecode64(0),  0ll);
+    EXPECT_EQ(ZigZagDecode64(1), -1ll);
+    EXPECT_EQ(ZigZagDecode64(2),  1ll);
+    EXPECT_EQ(ZigZagDecode64(3), -2ll);
+    // ...
+    EXPECT_EQ(std::numeric_limits<i64>::max(), ZigZagDecode64(std::numeric_limits<ui64>::max() - 1));
+    EXPECT_EQ(std::numeric_limits<i64>::min(), ZigZagDecode64(std::numeric_limits<ui64>::max()));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace
+} // namespace NYT
diff --git a/library/cpp/yt/coding/varint-inl.h b/library/cpp/yt/coding/varint-inl.h
new file mode 100644
index 00000000000..f0a09e9d305
--- /dev/null
+++ b/library/cpp/yt/coding/varint-inl.h
@@ -0,0 +1,240 @@
+#ifndef VARINT_INL_H_
+#error "Direct inclusion of this file is not allowed, include varint.h"
+// For the sake of sane code completion.
+#include "varint.h"
+#endif
+
+#include "zig_zag.h"
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TWriteCallback>
+Y_FORCE_INLINE int WriteVarUint64Impl(TWriteCallback doWrite, ui64 value)
+{
+    bool stop = false;
+    int bytesWritten = 0;
+    while (!stop) {
+        ++bytesWritten;
+        ui8 byte = static_cast<ui8>(value | 0x80);
+        value >>= 7;
+        if (value == 0) {
+            stop = true;
+            byte &= 0x7F;
+        }
+        doWrite(byte);
+    }
+    return bytesWritten;
+}
+
+// These are optimized versions of these Read/Write functions in protobuf/io/coded_stream.cc.
+Y_FORCE_INLINE int WriteVarUint64(IOutputStream* output, ui64 value)
+{
+    return WriteVarUint64Impl([&] (ui8 byte) {
+        output->Write(byte);
+    }, value);
+}
+
+Y_FORCE_INLINE int WriteVarUint64(char* output, ui64 value)
+{
+    return WriteVarUint64Impl([&] (ui8 byte) {
+        *output++ = byte;
+    }, value);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TOutput>
+Y_FORCE_INLINE int WriteVarUint32Impl(TOutput output, ui32 value)
+{
+    return WriteVarUint64(output, static_cast<ui64>(value));
+}
+
+Y_FORCE_INLINE int WriteVarUint32(IOutputStream* output, ui32 value)
+{
+    return WriteVarUint32Impl(output, value);
+}
+
+Y_FORCE_INLINE int WriteVarUint32(char* output, ui32 value)
+{
+    return WriteVarUint32Impl(output, value);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TOutput>
+Y_FORCE_INLINE int WriteVarInt32Impl(TOutput output, i32 value)
+{
+    return WriteVarUint64(output, static_cast<ui64>(ZigZagEncode32(value)));
+}
+
+Y_FORCE_INLINE int WriteVarInt32(IOutputStream* output, i32 value)
+{
+    return WriteVarInt32Impl(output, value);
+}
+
+Y_FORCE_INLINE int WriteVarInt32(char* output, i32 value)
+{
+    return WriteVarInt32Impl(output, value);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TOutput>
+Y_FORCE_INLINE int WriteVarInt64Impl(TOutput output, i64 value)
+{
+    return WriteVarUint64(output, static_cast<ui64>(ZigZagEncode64(value)));
+}
+
+Y_FORCE_INLINE int WriteVarInt64(IOutputStream* output, i64 value)
+{
+    return WriteVarInt64Impl(output, value);
+}
+
+Y_FORCE_INLINE int WriteVarInt64(char* output, i64 value)
+{
+    return WriteVarInt64Impl(output, value);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class TReadCallback>
+Y_FORCE_INLINE int ReadVarUint64Impl(TReadCallback doRead, ui64* value)
+{
+    size_t count = 0;
+    ui64 result = 0;
+
+    ui8 byte;
+    do {
+        if (7 * count > 8 * sizeof(ui64) ) {
+            throw TSimpleException("Value is too big for varuint64");
+        }
+        byte = doRead();
+        result |= (static_cast<ui64> (byte & 0x7F)) << (7 * count);
+        ++count;
+    } while (byte & 0x80);
+
+    *value = result;
+    return count;
+}
+
+Y_FORCE_INLINE int ReadVarUint64(IInputStream* input, ui64* value)
+{
+    return ReadVarUint64Impl([&] () {
+        char byte;
+        if (input->Read(&byte, 1) != 1) {
+            throw TSimpleException("Premature end of stream while reading varuint64");
+        }
+        return byte;
+    }, value);
+}
+
+Y_FORCE_INLINE int ReadVarUint64(const char* input, ui64* value)
+{
+    return ReadVarUint64Impl([&] () {
+        char byte = *input;
+        ++input;
+        return byte;
+    }, value);
+}
+
+Y_FORCE_INLINE int ReadVarUint64(const char* input, const char* end, ui64* value)
+{
+    return ReadVarUint64Impl([&] () {
+        if (input == end) {
+            throw TSimpleException("Premature end of data while reading varuint64");
+        }
+        char byte = *input;
+        ++input;
+        return byte;
+    }, value);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class... Args>
+Y_FORCE_INLINE int ReadVarUint32Impl(ui32* value, Args... args)
+{
+    ui64 varInt;
+    int bytesRead = ReadVarUint64(args..., &varInt);
+    if (varInt > std::numeric_limits<ui32>::max()) {
+        throw TSimpleException("Value is too big for varuint32");
+    }
+    *value = static_cast<ui32>(varInt);
+    return bytesRead;
+}
+
+Y_FORCE_INLINE int ReadVarUint32(IInputStream* input, ui32* value)
+{
+    return ReadVarUint32Impl(value, input);
+}
+
+Y_FORCE_INLINE int ReadVarUint32(const char* input, ui32* value)
+{
+    return ReadVarUint32Impl(value, input);
+}
+
+Y_FORCE_INLINE int ReadVarUint32(const char* input, const char* end, ui32* value)
+{
+    return ReadVarUint32Impl(value, input, end);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class... Args>
+Y_FORCE_INLINE int ReadVarInt32Impl(i32* value, Args... args)
+{
+    ui64 varInt;
+    int bytesRead = ReadVarUint64(args..., &varInt);
+    if (varInt > std::numeric_limits<ui32>::max()) {
+        throw TSimpleException("Value is too big for varint32");
+    }
+    *value = ZigZagDecode32(static_cast<ui32>(varInt));
+    return bytesRead;
+}
+
+Y_FORCE_INLINE int ReadVarInt32(IInputStream* input, i32* value)
+{
+    return ReadVarInt32Impl(value, input);
+}
+
+Y_FORCE_INLINE int ReadVarInt32(const char* input, i32* value)
+{
+    return ReadVarInt32Impl(value, input);
+}
+
+Y_FORCE_INLINE int ReadVarInt32(const char* input, const char* end, i32* value)
+{
+    return ReadVarInt32Impl(value, input, end);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <class... Args>
+Y_FORCE_INLINE int ReadVarInt64Impl(i64* value, Args... args)
+{
+    ui64 varInt;
+    int bytesRead = ReadVarUint64(args..., &varInt);
+    *value = ZigZagDecode64(varInt);
+    return bytesRead;
+}
+
+Y_FORCE_INLINE int ReadVarInt64(IInputStream* input, i64* value)
+{
+    return ReadVarInt64Impl(value, input);
+}
+
+Y_FORCE_INLINE int ReadVarInt64(const char* input, i64* value)
+{
+    return ReadVarInt64Impl(value, input);
+}
+
+Y_FORCE_INLINE int ReadVarInt64(const char* input, const char* end, i64* value)
+{
+    return ReadVarInt64Impl(value, input, end);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yt/coding/varint.h b/library/cpp/yt/coding/varint.h
new file mode 100644
index 00000000000..c5399f8b069
--- /dev/null
+++ b/library/cpp/yt/coding/varint.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <library/cpp/yt/exception/exception.h>
+
+#include <util/system/defaults.h>
+
+#include <util/stream/input.h>
+#include <util/stream/output.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+constexpr size_t MaxVarInt64Size = (8 * sizeof(ui64) - 1) / 7 + 1;
+constexpr size_t MaxVarUint64Size = (8 * sizeof(ui64) - 1) / 7 + 1;
+
+constexpr size_t MaxVarInt32Size = (8 * sizeof(ui32) - 1) / 7 + 1;
+constexpr size_t MaxVarUint32Size = (8 * sizeof(ui32) - 1) / 7 + 1;
+
+// Various functions to read/write varints.
+
+// Returns the number of bytes written.
+int WriteVarUint64(IOutputStream* output, ui64 value);
+int WriteVarUint32(IOutputStream* output, ui32 value);
+int WriteVarInt32(IOutputStream* output, i32 value);
+int WriteVarInt64(IOutputStream* output, i64 value);
+
+int WriteVarUint64(char* output, ui64 value);
+int WriteVarUint32(char* output, ui32 value);
+int WriteVarInt32(char* output, i32 value);
+int WriteVarInt64(char* output, i64 value);
+
+// Returns the number of bytes read.
+int ReadVarUint64(IInputStream* input, ui64* value);
+int ReadVarUint32(IInputStream* input, ui32* value);
+int ReadVarInt32(IInputStream* input, i32* value);
+int ReadVarInt64(IInputStream* input, i64* value);
+
+int ReadVarUint64(const char* input, ui64* value);
+int ReadVarUint32(const char* input, ui32* value);
+int ReadVarInt32(const char* input, i32* value);
+int ReadVarInt64(const char* input, i64* value);
+
+// Throws exception if integer is not complete when `end' is reached.
+int ReadVarUint64(const char* input, const char* end, ui64* value);
+int ReadVarUint32(const char* input, const char* end, ui32* value);
+int ReadVarInt32(const char* input, const char* end, i32* value);
+int ReadVarInt64(const char* input, const char* end, i64* value);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define VARINT_INL_H_
+#include "varint-inl.h"
+#undef VARINT_INL_H_
diff --git a/library/cpp/yt/coding/ya.make b/library/cpp/yt/coding/ya.make
new file mode 100644
index 00000000000..3dae919e57f
--- /dev/null
+++ b/library/cpp/yt/coding/ya.make
@@ -0,0 +1,12 @@
+LIBRARY()
+
+SRCS(
+)
+
+PEERDIR(
+    library/cpp/yt/exception
+)
+
+END()
+
+RECURSE_FOR_TESTS(unittests)
diff --git a/library/cpp/yt/coding/zig_zag-inl.h b/library/cpp/yt/coding/zig_zag-inl.h
new file mode 100644
index 00000000000..c611f7e1d46
--- /dev/null
+++ b/library/cpp/yt/coding/zig_zag-inl.h
@@ -0,0 +1,40 @@
+#ifndef ZIG_ZAG_INL_H_
+#error "Direct inclusion of this file is not allowed, include zig_zag.h"
+// For the sake of sane code completion.
+#include "zig_zag.h"
+#endif
+#undef ZIG_ZAG_INL_H_
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+inline ui32 ZigZagEncode32(i32 n)
+{
+    // Note:  the right-shift must be arithmetic.
+    // Note:  left shift must be unsigned because of overflow.
+    return (static_cast<ui32>(n) << 1) ^ static_cast<ui32>(n >> 31);
+}
+
+inline i32 ZigZagDecode32(ui32 n)
+{
+    // Note:  using unsigned types prevent undefined behavior.
+    return static_cast<i32>((n >> 1) ^ (~(n & 1) + 1));
+}
+
+inline ui64 ZigZagEncode64(i64 n)
+{
+    // Note:  the right-shift must be arithmetic.
+    // Note:  left shift must be unsigned because of overflow.
+    return (static_cast<ui64>(n) << 1) ^ static_cast<ui64>(n >> 63);
+}
+
+inline i64 ZigZagDecode64(ui64 n)
+{
+    // Note:  using unsigned types prevent undefined behavior.
+    return static_cast<i64>((n >> 1) ^ (~(n & 1) + 1));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
diff --git a/library/cpp/yt/coding/zig_zag.h b/library/cpp/yt/coding/zig_zag.h
new file mode 100644
index 00000000000..aa6d425a1c0
--- /dev/null
+++ b/library/cpp/yt/coding/zig_zag.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include <util/system/types.h>
+
+namespace NYT {
+
+////////////////////////////////////////////////////////////////////////////////
+
+// These Functions provide coding of integers with property: 0 <= f(x) <= 2 * |x|
+// Actually taken 'as is' from protobuf/wire_format_lite.h
+
+ui32 ZigZagEncode32(i32 n);
+i32 ZigZagDecode32(ui32 n);
+
+ui64 ZigZagEncode64(i64 n);
+i64 ZigZagDecode64(ui64 n);
+
+////////////////////////////////////////////////////////////////////////////////
+
+} // namespace NYT
+
+#define ZIG_ZAG_INL_H_
+#include "zig_zag-inl.h"
+#undef ZIG_ZAG_INL_H_
author	Devtools Arcadia <[email protected]>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <[email protected]>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yt/coding