diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yt/coding | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yt/coding')
-rw-r--r-- | library/cpp/yt/coding/unittests/varint_ut.cpp | 134 | ||||
-rw-r--r-- | library/cpp/yt/coding/unittests/ya.make | 15 | ||||
-rw-r--r-- | library/cpp/yt/coding/unittests/zig_zag_ut.cpp | 57 | ||||
-rw-r--r-- | library/cpp/yt/coding/varint-inl.h | 240 | ||||
-rw-r--r-- | library/cpp/yt/coding/varint.h | 56 | ||||
-rw-r--r-- | library/cpp/yt/coding/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/yt/coding/zig_zag-inl.h | 40 | ||||
-rw-r--r-- | library/cpp/yt/coding/zig_zag.h | 24 |
8 files changed, 578 insertions, 0 deletions
diff --git a/library/cpp/yt/coding/unittests/varint_ut.cpp b/library/cpp/yt/coding/unittests/varint_ut.cpp new file mode 100644 index 00000000000..ed83ab5c92b --- /dev/null +++ b/library/cpp/yt/coding/unittests/varint_ut.cpp @@ -0,0 +1,134 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yt/coding/varint.h> + +#include <util/random/random.h> + +#include <util/string/escape.h> + +#include <tuple> + +namespace NYT { +namespace { + +using ::testing::Values; + +//////////////////////////////////////////////////////////////////////////////// + +class TWriteVarIntTest: public ::testing::TestWithParam<std::tuple<ui64, TString> > +{ }; + +TEST_P(TWriteVarIntTest, Serialization) +{ + ui64 value = std::get<0>(GetParam()); + TString rightAnswer = std::get<1>(GetParam()); + + TStringStream outputStream; + WriteVarUint64(&outputStream, value); + EXPECT_EQ(rightAnswer, outputStream.Str()); +} + +//////////////////////////////////////////////////////////////////////////////// + +class TReadVarIntTest: public ::testing::TestWithParam<std::tuple<ui64, TString> > +{ }; + +TEST_P(TReadVarIntTest, Serialization) +{ + ui64 rightAnswer = std::get<0>(GetParam()); + TString input = std::get<1>(GetParam()); + + TStringInput inputStream(input); + ui64 value; + ReadVarUint64(&inputStream, &value); + EXPECT_EQ(rightAnswer, value); +} + +TEST(TReadVarIntTest, Overflow) +{ + TString input("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 11); + TStringInput inputStream(input); + ui64 value; + EXPECT_ANY_THROW(ReadVarUint64(&inputStream, &value)); +} + +//////////////////////////////////////////////////////////////////////////////// + +auto ValuesForVarIntTests = Values( + // Simple cases. + std::make_tuple(0x0ull, TString("\x00", 1)), + std::make_tuple(0x1ull, TString("\x01", 1)), + std::make_tuple(0x2ull, TString("\x02", 1)), + std::make_tuple(0x3ull, TString("\x03", 1)), + std::make_tuple(0x4ull, TString("\x04", 1)), + + // The following "magic numbers" are critical points for varint encoding. + std::make_tuple((1ull << 7) - 1, TString("\x7f", 1)), + std::make_tuple((1ull << 7), TString("\x80\x01", 2)), + std::make_tuple((1ull << 14) - 1, TString("\xff\x7f", 2)), + std::make_tuple((1ull << 14), TString("\x80\x80\x01", 3)), + std::make_tuple((1ull << 21) - 1, TString("\xff\xff\x7f", 3)), + std::make_tuple((1ull << 21), TString("\x80\x80\x80\x01", 4)), + std::make_tuple((1ull << 28) - 1, TString("\xff\xff\xff\x7f", 4)), + std::make_tuple((1ull << 28), TString("\x80\x80\x80\x80\x01", 5)), + std::make_tuple((1ull << 35) - 1, TString("\xff\xff\xff\xff\x7f", 5)), + std::make_tuple((1ull << 35), TString("\x80\x80\x80\x80\x80\x01", 6)), + std::make_tuple((1ull << 42) - 1, TString("\xff\xff\xff\xff\xff\x7f", 6)), + std::make_tuple((1ull << 42), TString("\x80\x80\x80\x80\x80\x80\x01", 7)), + std::make_tuple((1ull << 49) - 1, TString("\xff\xff\xff\xff\xff\xff\x7f", 7)), + std::make_tuple((1ull << 49), TString("\x80\x80\x80\x80\x80\x80\x80\x01", 8)), + std::make_tuple((1ull << 56) - 1, TString("\xff\xff\xff\xff\xff\xff\xff\x7f", 8)), + std::make_tuple((1ull << 56), TString("\x80\x80\x80\x80\x80\x80\x80\x80\x01", 9)), + std::make_tuple((1ull << 63) - 1, TString("\xff\xff\xff\xff\xff\xff\xff\xff\x7f", 9)), + std::make_tuple((1ull << 63), TString("\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01", 10)), + + // Boundary case. + std::make_tuple(static_cast<ui64>(-1), TString("\xff\xff\xff\xff\xff\xff\xff\xff\xff\x01", 10)) +); + +INSTANTIATE_TEST_SUITE_P(ValueParametrized, TWriteVarIntTest, + ValuesForVarIntTests); + +INSTANTIATE_TEST_SUITE_P(ValueParametrized, TReadVarIntTest, + ValuesForVarIntTests); + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TVarInt32Test, RandomValues) +{ + srand(100500); // Set seed + const int numberOfValues = 10000; + + TStringStream stream; + for (int i = 0; i < numberOfValues; ++i) { + i32 expected = static_cast<i32>(RandomNumber<ui32>()); + WriteVarInt32(&stream, expected); + i32 actual; + ReadVarInt32(&stream, &actual); + EXPECT_EQ(expected, actual) + << "Encoded Variant: " << EscapeC(stream.Str()); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TVarInt64Test, RandomValues) +{ + srand(100500); // Set seed + const int numberOfValues = 10000; + + TStringStream stream; + for (int i = 0; i < numberOfValues; ++i) { + i64 expected = static_cast<i64>(RandomNumber<ui64>()); + WriteVarInt64(&stream, expected); + i64 actual; + ReadVarInt64(&stream, &actual); + EXPECT_EQ(expected, actual) + << "Encoded Variant: " << EscapeC(stream.Str()); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/coding/unittests/ya.make b/library/cpp/yt/coding/unittests/ya.make new file mode 100644 index 00000000000..e0622db22d2 --- /dev/null +++ b/library/cpp/yt/coding/unittests/ya.make @@ -0,0 +1,15 @@ +GTEST() + +OWNER(g:yt) + +SRCS( + zig_zag_ut.cpp + varint_ut.cpp +) + +PEERDIR( + library/cpp/yt/coding + library/cpp/testing/gtest +) + +END() diff --git a/library/cpp/yt/coding/unittests/zig_zag_ut.cpp b/library/cpp/yt/coding/unittests/zig_zag_ut.cpp new file mode 100644 index 00000000000..fae4e63064e --- /dev/null +++ b/library/cpp/yt/coding/unittests/zig_zag_ut.cpp @@ -0,0 +1,57 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yt/coding/zig_zag.h> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TZigZagTest, Encode32) +{ + EXPECT_EQ(0u, ZigZagEncode32( 0)); + EXPECT_EQ(1u, ZigZagEncode32(-1)); + EXPECT_EQ(2u, ZigZagEncode32( 1)); + EXPECT_EQ(3u, ZigZagEncode32(-2)); + // ... + EXPECT_EQ(std::numeric_limits<ui32>::max() - 1, ZigZagEncode32(std::numeric_limits<i32>::max())); + EXPECT_EQ(std::numeric_limits<ui32>::max(), ZigZagEncode32(std::numeric_limits<i32>::min())); +} + +TEST(TZigZagTest, Decode32) +{ + EXPECT_EQ( 0, ZigZagDecode32(0)); + EXPECT_EQ(-1, ZigZagDecode32(1)); + EXPECT_EQ( 1, ZigZagDecode32(2)); + EXPECT_EQ(-2, ZigZagDecode32(3)); + // ... + EXPECT_EQ(std::numeric_limits<i32>::max(), ZigZagDecode32(std::numeric_limits<ui32>::max() - 1)); + EXPECT_EQ(std::numeric_limits<i32>::min(), ZigZagDecode32(std::numeric_limits<ui32>::max())); +} + +TEST(TZigZagTest, Encode64) +{ + EXPECT_EQ(0ull, ZigZagEncode64( 0)); + EXPECT_EQ(1ull, ZigZagEncode64(-1)); + EXPECT_EQ(2ull, ZigZagEncode64( 1)); + EXPECT_EQ(3ull, ZigZagEncode64(-2)); + // ... + EXPECT_EQ(std::numeric_limits<ui64>::max() - 1, ZigZagEncode64(std::numeric_limits<i64>::max())); + EXPECT_EQ(std::numeric_limits<ui64>::max(), ZigZagEncode64(std::numeric_limits<i64>::min())); +} + +TEST(TZigZagTest, Decode64) +{ + EXPECT_EQ(ZigZagDecode64(0), 0ll); + EXPECT_EQ(ZigZagDecode64(1), -1ll); + EXPECT_EQ(ZigZagDecode64(2), 1ll); + EXPECT_EQ(ZigZagDecode64(3), -2ll); + // ... + EXPECT_EQ(std::numeric_limits<i64>::max(), ZigZagDecode64(std::numeric_limits<ui64>::max() - 1)); + EXPECT_EQ(std::numeric_limits<i64>::min(), ZigZagDecode64(std::numeric_limits<ui64>::max())); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/coding/varint-inl.h b/library/cpp/yt/coding/varint-inl.h new file mode 100644 index 00000000000..f0a09e9d305 --- /dev/null +++ b/library/cpp/yt/coding/varint-inl.h @@ -0,0 +1,240 @@ +#ifndef VARINT_INL_H_ +#error "Direct inclusion of this file is not allowed, include varint.h" +// For the sake of sane code completion. +#include "varint.h" +#endif + +#include "zig_zag.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +template <class TWriteCallback> +Y_FORCE_INLINE int WriteVarUint64Impl(TWriteCallback doWrite, ui64 value) +{ + bool stop = false; + int bytesWritten = 0; + while (!stop) { + ++bytesWritten; + ui8 byte = static_cast<ui8>(value | 0x80); + value >>= 7; + if (value == 0) { + stop = true; + byte &= 0x7F; + } + doWrite(byte); + } + return bytesWritten; +} + +// These are optimized versions of these Read/Write functions in protobuf/io/coded_stream.cc. +Y_FORCE_INLINE int WriteVarUint64(IOutputStream* output, ui64 value) +{ + return WriteVarUint64Impl([&] (ui8 byte) { + output->Write(byte); + }, value); +} + +Y_FORCE_INLINE int WriteVarUint64(char* output, ui64 value) +{ + return WriteVarUint64Impl([&] (ui8 byte) { + *output++ = byte; + }, value); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TOutput> +Y_FORCE_INLINE int WriteVarUint32Impl(TOutput output, ui32 value) +{ + return WriteVarUint64(output, static_cast<ui64>(value)); +} + +Y_FORCE_INLINE int WriteVarUint32(IOutputStream* output, ui32 value) +{ + return WriteVarUint32Impl(output, value); +} + +Y_FORCE_INLINE int WriteVarUint32(char* output, ui32 value) +{ + return WriteVarUint32Impl(output, value); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TOutput> +Y_FORCE_INLINE int WriteVarInt32Impl(TOutput output, i32 value) +{ + return WriteVarUint64(output, static_cast<ui64>(ZigZagEncode32(value))); +} + +Y_FORCE_INLINE int WriteVarInt32(IOutputStream* output, i32 value) +{ + return WriteVarInt32Impl(output, value); +} + +Y_FORCE_INLINE int WriteVarInt32(char* output, i32 value) +{ + return WriteVarInt32Impl(output, value); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TOutput> +Y_FORCE_INLINE int WriteVarInt64Impl(TOutput output, i64 value) +{ + return WriteVarUint64(output, static_cast<ui64>(ZigZagEncode64(value))); +} + +Y_FORCE_INLINE int WriteVarInt64(IOutputStream* output, i64 value) +{ + return WriteVarInt64Impl(output, value); +} + +Y_FORCE_INLINE int WriteVarInt64(char* output, i64 value) +{ + return WriteVarInt64Impl(output, value); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TReadCallback> +Y_FORCE_INLINE int ReadVarUint64Impl(TReadCallback doRead, ui64* value) +{ + size_t count = 0; + ui64 result = 0; + + ui8 byte; + do { + if (7 * count > 8 * sizeof(ui64) ) { + throw TSimpleException("Value is too big for varuint64"); + } + byte = doRead(); + result |= (static_cast<ui64> (byte & 0x7F)) << (7 * count); + ++count; + } while (byte & 0x80); + + *value = result; + return count; +} + +Y_FORCE_INLINE int ReadVarUint64(IInputStream* input, ui64* value) +{ + return ReadVarUint64Impl([&] () { + char byte; + if (input->Read(&byte, 1) != 1) { + throw TSimpleException("Premature end of stream while reading varuint64"); + } + return byte; + }, value); +} + +Y_FORCE_INLINE int ReadVarUint64(const char* input, ui64* value) +{ + return ReadVarUint64Impl([&] () { + char byte = *input; + ++input; + return byte; + }, value); +} + +Y_FORCE_INLINE int ReadVarUint64(const char* input, const char* end, ui64* value) +{ + return ReadVarUint64Impl([&] () { + if (input == end) { + throw TSimpleException("Premature end of data while reading varuint64"); + } + char byte = *input; + ++input; + return byte; + }, value); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class... Args> +Y_FORCE_INLINE int ReadVarUint32Impl(ui32* value, Args... args) +{ + ui64 varInt; + int bytesRead = ReadVarUint64(args..., &varInt); + if (varInt > std::numeric_limits<ui32>::max()) { + throw TSimpleException("Value is too big for varuint32"); + } + *value = static_cast<ui32>(varInt); + return bytesRead; +} + +Y_FORCE_INLINE int ReadVarUint32(IInputStream* input, ui32* value) +{ + return ReadVarUint32Impl(value, input); +} + +Y_FORCE_INLINE int ReadVarUint32(const char* input, ui32* value) +{ + return ReadVarUint32Impl(value, input); +} + +Y_FORCE_INLINE int ReadVarUint32(const char* input, const char* end, ui32* value) +{ + return ReadVarUint32Impl(value, input, end); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class... Args> +Y_FORCE_INLINE int ReadVarInt32Impl(i32* value, Args... args) +{ + ui64 varInt; + int bytesRead = ReadVarUint64(args..., &varInt); + if (varInt > std::numeric_limits<ui32>::max()) { + throw TSimpleException("Value is too big for varint32"); + } + *value = ZigZagDecode32(static_cast<ui32>(varInt)); + return bytesRead; +} + +Y_FORCE_INLINE int ReadVarInt32(IInputStream* input, i32* value) +{ + return ReadVarInt32Impl(value, input); +} + +Y_FORCE_INLINE int ReadVarInt32(const char* input, i32* value) +{ + return ReadVarInt32Impl(value, input); +} + +Y_FORCE_INLINE int ReadVarInt32(const char* input, const char* end, i32* value) +{ + return ReadVarInt32Impl(value, input, end); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class... Args> +Y_FORCE_INLINE int ReadVarInt64Impl(i64* value, Args... args) +{ + ui64 varInt; + int bytesRead = ReadVarUint64(args..., &varInt); + *value = ZigZagDecode64(varInt); + return bytesRead; +} + +Y_FORCE_INLINE int ReadVarInt64(IInputStream* input, i64* value) +{ + return ReadVarInt64Impl(value, input); +} + +Y_FORCE_INLINE int ReadVarInt64(const char* input, i64* value) +{ + return ReadVarInt64Impl(value, input); +} + +Y_FORCE_INLINE int ReadVarInt64(const char* input, const char* end, i64* value) +{ + return ReadVarInt64Impl(value, input, end); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/coding/varint.h b/library/cpp/yt/coding/varint.h new file mode 100644 index 00000000000..c5399f8b069 --- /dev/null +++ b/library/cpp/yt/coding/varint.h @@ -0,0 +1,56 @@ +#pragma once + +#include <library/cpp/yt/exception/exception.h> + +#include <util/system/defaults.h> + +#include <util/stream/input.h> +#include <util/stream/output.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +constexpr size_t MaxVarInt64Size = (8 * sizeof(ui64) - 1) / 7 + 1; +constexpr size_t MaxVarUint64Size = (8 * sizeof(ui64) - 1) / 7 + 1; + +constexpr size_t MaxVarInt32Size = (8 * sizeof(ui32) - 1) / 7 + 1; +constexpr size_t MaxVarUint32Size = (8 * sizeof(ui32) - 1) / 7 + 1; + +// Various functions to read/write varints. + +// Returns the number of bytes written. +int WriteVarUint64(IOutputStream* output, ui64 value); +int WriteVarUint32(IOutputStream* output, ui32 value); +int WriteVarInt32(IOutputStream* output, i32 value); +int WriteVarInt64(IOutputStream* output, i64 value); + +int WriteVarUint64(char* output, ui64 value); +int WriteVarUint32(char* output, ui32 value); +int WriteVarInt32(char* output, i32 value); +int WriteVarInt64(char* output, i64 value); + +// Returns the number of bytes read. +int ReadVarUint64(IInputStream* input, ui64* value); +int ReadVarUint32(IInputStream* input, ui32* value); +int ReadVarInt32(IInputStream* input, i32* value); +int ReadVarInt64(IInputStream* input, i64* value); + +int ReadVarUint64(const char* input, ui64* value); +int ReadVarUint32(const char* input, ui32* value); +int ReadVarInt32(const char* input, i32* value); +int ReadVarInt64(const char* input, i64* value); + +// Throws exception if integer is not complete when `end' is reached. +int ReadVarUint64(const char* input, const char* end, ui64* value); +int ReadVarUint32(const char* input, const char* end, ui32* value); +int ReadVarInt32(const char* input, const char* end, i32* value); +int ReadVarInt64(const char* input, const char* end, i64* value); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define VARINT_INL_H_ +#include "varint-inl.h" +#undef VARINT_INL_H_ diff --git a/library/cpp/yt/coding/ya.make b/library/cpp/yt/coding/ya.make new file mode 100644 index 00000000000..3dae919e57f --- /dev/null +++ b/library/cpp/yt/coding/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( +) + +PEERDIR( + library/cpp/yt/exception +) + +END() + +RECURSE_FOR_TESTS(unittests) diff --git a/library/cpp/yt/coding/zig_zag-inl.h b/library/cpp/yt/coding/zig_zag-inl.h new file mode 100644 index 00000000000..c611f7e1d46 --- /dev/null +++ b/library/cpp/yt/coding/zig_zag-inl.h @@ -0,0 +1,40 @@ +#ifndef ZIG_ZAG_INL_H_ +#error "Direct inclusion of this file is not allowed, include zig_zag.h" +// For the sake of sane code completion. +#include "zig_zag.h" +#endif +#undef ZIG_ZAG_INL_H_ + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +inline ui32 ZigZagEncode32(i32 n) +{ + // Note: the right-shift must be arithmetic. + // Note: left shift must be unsigned because of overflow. + return (static_cast<ui32>(n) << 1) ^ static_cast<ui32>(n >> 31); +} + +inline i32 ZigZagDecode32(ui32 n) +{ + // Note: using unsigned types prevent undefined behavior. + return static_cast<i32>((n >> 1) ^ (~(n & 1) + 1)); +} + +inline ui64 ZigZagEncode64(i64 n) +{ + // Note: the right-shift must be arithmetic. + // Note: left shift must be unsigned because of overflow. + return (static_cast<ui64>(n) << 1) ^ static_cast<ui64>(n >> 63); +} + +inline i64 ZigZagDecode64(ui64 n) +{ + // Note: using unsigned types prevent undefined behavior. + return static_cast<i64>((n >> 1) ^ (~(n & 1) + 1)); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/coding/zig_zag.h b/library/cpp/yt/coding/zig_zag.h new file mode 100644 index 00000000000..aa6d425a1c0 --- /dev/null +++ b/library/cpp/yt/coding/zig_zag.h @@ -0,0 +1,24 @@ +#pragma once + +#include <util/system/types.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +// These Functions provide coding of integers with property: 0 <= f(x) <= 2 * |x| +// Actually taken 'as is' from protobuf/wire_format_lite.h + +ui32 ZigZagEncode32(i32 n); +i32 ZigZagDecode32(ui32 n); + +ui64 ZigZagEncode64(i64 n); +i64 ZigZagDecode64(ui64 n); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define ZIG_ZAG_INL_H_ +#include "zig_zag-inl.h" +#undef ZIG_ZAG_INL_H_ |