diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/bit_io | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/bit_io')
-rw-r--r-- | library/cpp/bit_io/bitinout_ut.cpp | 279 | ||||
-rw-r--r-- | library/cpp/bit_io/bitinput.cpp | 1 | ||||
-rw-r--r-- | library/cpp/bit_io/bitinput.h | 171 | ||||
-rw-r--r-- | library/cpp/bit_io/bitinput_impl.cpp | 1 | ||||
-rw-r--r-- | library/cpp/bit_io/bitinput_impl.h | 110 | ||||
-rw-r--r-- | library/cpp/bit_io/bitoutput.cpp | 1 | ||||
-rw-r--r-- | library/cpp/bit_io/bitoutput.h | 195 | ||||
-rw-r--r-- | library/cpp/bit_io/ut/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/bit_io/ya.make | 18 |
9 files changed, 788 insertions, 0 deletions
diff --git a/library/cpp/bit_io/bitinout_ut.cpp b/library/cpp/bit_io/bitinout_ut.cpp new file mode 100644 index 0000000000..23a1ddf344 --- /dev/null +++ b/library/cpp/bit_io/bitinout_ut.cpp @@ -0,0 +1,279 @@ +#include "bitinput.h" +#include "bitoutput.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/stream/buffer.h> +#include <util/generic/buffer.h> + +namespace NBitIO { + static const char BITS_REF[] = + "00100010 01000000 00000000 00100111 11011111 01100111 11010101 00010100 " + "00100010 01100011 11100011 00110000 11011011 11011111 01001100 00110101 " + "10011110 01011111 01010000 00000110 00011011 00100110 00010100 01110011 " + "00001010 10101010 10101010 10101010 10101010 10101010 10101010 10101010 " + "10110101 01010101 01010101 01010101 01010101 01010101 01010101 01010101 " + "01000000"; + + inline ui64 Bits(ui64 bytes) { + return bytes << 3ULL; + } + + inline TString PrintBits(const char* a, const char* b, bool reverse = false) { + TString s; + TStringOutput out(s); + for (const char* it = a; it != b; ++it) { + if (it != a) + out << ' '; + + ui8 byte = *it; + + if (reverse) + byte = ReverseBits(byte); + + for (ui32 mask = 1; mask < 0xff; mask <<= 1) { + out << ((byte & mask) ? '1' : '0'); + } + } + + return s; + } + + template <typename T> + inline TString PrintBits(T t, ui32 bits = Bits(sizeof(T))) { + return PrintBits((char*)&t, ((char*)&t) + BytesUp(bits)); + } +} + +class TBitIOTest: public TTestBase { + UNIT_TEST_SUITE(TBitIOTest); + UNIT_TEST(TestBitIO) + UNIT_TEST_SUITE_END(); + +private: + using TBi = NBitIO::TBitInput; + using TVec = TVector<char>; + + void static CheckBits(const TVec& v, const TString& ref, const TString& rem) { + UNIT_ASSERT_VALUES_EQUAL_C(NBitIO::PrintBits(v.begin(), v.end()), ref, rem); + } + + void DoRead(TBi& b, ui32& t) { + b.Read(t, 1, 0); // 1 + b.ReadK<3>(t, 1); // 4 + b.Read(t, 5, 4); // 9 + b.ReadK<14>(t, 9); // 23 + b.Read(t, 1, 23); // 24 + b.ReadK<5>(t, 24); // 29 + b.Read(t, 3, 29); // 32 + } + + template <typename TBo> + void DoWrite(TBo& b, ui32 t) { + b.Write(t, 1, 0); //1 + b.Write(t, 3, 1); //4 + b.Write(t, 5, 4); //9 + b.Write(t, 14, 9); //23 + b.Write(t, 1, 23); //24 + b.Write(t, 5, 24); //29 + b.Write(t, 3, 29); //32 + } + + template <typename TBo> + void DoWrite1(TBo& out, const TString& rem) { + out.Write(0x0C, 3); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 1u, (rem + ", " + ToString(__LINE__))); + out.Write(0x18, 4); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 1u, (rem + ", " + ToString(__LINE__))); + out.Write(0x0C, 3); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 2u, (rem + ", " + ToString(__LINE__))); + out.Write(0x30000, 17); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 4u, (rem + ", " + ToString(__LINE__))); + out.Write(0x0C, 3); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 4u, (rem + ", " + ToString(__LINE__))); + } + + template <typename TBo> + void DoWrite2(TBo& out, const TString& rem) { + out.Write(0x0C, 3); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 8u, (rem + ", " + ToString(__LINE__))); + + out.Write(0x42, 7); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 9u, (rem + ", " + ToString(__LINE__))); + + DoWrite(out, 1637415112); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 13u, (rem + ", " + ToString(__LINE__))); + + DoWrite(out, 897998715); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 17u, (rem + ", " + ToString(__LINE__))); + + DoWrite(out, 201416527); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 21u, (rem + ", " + ToString(__LINE__))); + + DoWrite(out, 432344219); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 25u, (rem + ", " + ToString(__LINE__))); + + out.Write(0xAAAAAAAAAAAAAAAAULL, 64); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 33u, (rem + ", " + ToString(__LINE__))); + + out.Write(0x5555555555555555ULL, 64); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 41u, (rem + ", " + ToString(__LINE__))); + } + + void DoBitOutput(NBitIO::TBitOutputYVector& out, const TString& rem) { + DoWrite1(out, rem); + + out.WriteWords<8>(0xabcdef); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 8u, (rem + ", " + ToString(__LINE__))); + + DoWrite2(out, rem); + } + + void DoBitOutput(NBitIO::TBitOutputArray& out, const TString& rem) { + DoWrite1(out, rem); + + out.WriteWords<8>(0xabcdef); + UNIT_ASSERT_VALUES_EQUAL_C(out.GetOffset(), 8u, (rem + ", " + ToString(__LINE__))); + + DoWrite2(out, rem); + } + + void DoBitInput(TBi& in, const TString& rem) { + UNIT_ASSERT(!in.Eof()); + + { + ui64 val; + + val = 0; + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 0u, (rem + ": " + NBitIO::PrintBits(val))); + + UNIT_ASSERT_C(in.Read(val, 3), (rem + ": " + NBitIO::PrintBits(val)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x4u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 1u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + val = 0; + UNIT_ASSERT_C(in.Read(val, 4), (rem + ": " + NBitIO::PrintBits(val)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x8u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 1u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + val = 0; + UNIT_ASSERT_C(in.Read(val, 3), (rem + ": " + NBitIO::PrintBits(val)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x4u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 2u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + val = 0; + UNIT_ASSERT_C(in.Read(val, 17), (rem + ": " + NBitIO::PrintBits(val)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x10000u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 4u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 4u, (rem + ": " + NBitIO::PrintBits(val))); + + { + ui32 rt = 0; + in.ReadRandom(30, rt, 10, 20); + UNIT_ASSERT_STRINGS_EQUAL(NBitIO::PrintBits(rt).data(), "00000000 00000000 00001111 01111100"); + } + val = 0; + UNIT_ASSERT_C(in.Read(val, 3), (rem + ": " + NBitIO::PrintBits(val)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x4u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 4u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + val = 0; + UNIT_ASSERT_C(in.ReadWords<8>(val), (rem + ": " + NBitIO::PrintBits(val)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0xabcdefU, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 8u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + val = 0; + UNIT_ASSERT_C(in.Read(val, 3), (rem + ", " + ToString(__LINE__)).data()); + + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x4u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 8u, (rem + ": " + NBitIO::PrintBits(val))); + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + + val = 0; + in.Read(val, 7); + UNIT_ASSERT_VALUES_EQUAL_C(val, 0x42u, (rem + ": " + NBitIO::PrintBits(val))); + } + + { + ui32 v = 0; + + DoRead(in, v); + UNIT_ASSERT_VALUES_EQUAL_C(v, 1637415112ul, (rem + ": " + NBitIO::PrintBits(v))); + DoRead(in, v); + UNIT_ASSERT_VALUES_EQUAL_C(v, 897998715u, (rem + ": " + NBitIO::PrintBits(v))); + DoRead(in, v); + UNIT_ASSERT_VALUES_EQUAL_C(v, 201416527u, (rem + ": " + NBitIO::PrintBits(v))); + DoRead(in, v); + UNIT_ASSERT_VALUES_EQUAL_C(v, 432344219u, (rem + ": " + NBitIO::PrintBits(v))); + + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 25u, (rem + ": " + NBitIO::PrintBits(v))); + } + + { + ui64 v8 = 0; + in.ReadSafe(v8, 64); + + UNIT_ASSERT_VALUES_EQUAL_C(v8, 0xAAAAAAAAAAAAAAAAULL, (rem + ": " + NBitIO::PrintBits(v8))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 33u, (rem + ": " + NBitIO::PrintBits(v8))); + + v8 = 0; + in.ReadK<64>(v8); + + UNIT_ASSERT_VALUES_EQUAL_C(v8, 0x5555555555555555ULL, (rem + ": " + NBitIO::PrintBits(v8))); + UNIT_ASSERT_VALUES_EQUAL_C(in.GetOffset(), 41u, (rem + ": " + NBitIO::PrintBits(v8))); + } + + ui32 v = 0; + UNIT_ASSERT_C(!in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + UNIT_ASSERT_C(in.Read(v, 5), (rem + ", " + ToString(__LINE__)).data()); + UNIT_ASSERT_C(in.Eof(), (rem + ", " + ToString(__LINE__)).data()); + } + + void TestBitIO() { + { + TVec vec; + + { + NBitIO::TBitOutputYVector out(&vec); + DoBitOutput(out, ToString(__LINE__)); + } + + CheckBits(vec, NBitIO::BITS_REF, ToString(__LINE__).data()); + + { + TBi in(vec); + DoBitInput(in, ToString(__LINE__)); + } + } + { + TVec vec; + vec.resize(41, 0); + { + NBitIO::TBitOutputArray out(vec.begin(), vec.size()); + DoBitOutput(out, ToString(__LINE__)); + } + + CheckBits(vec, NBitIO::BITS_REF, ToString(__LINE__).data()); + + { + TBi in(vec); + DoBitInput(in, ToString(__LINE__)); + } + } + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TBitIOTest); diff --git a/library/cpp/bit_io/bitinput.cpp b/library/cpp/bit_io/bitinput.cpp new file mode 100644 index 0000000000..d7c37f06fc --- /dev/null +++ b/library/cpp/bit_io/bitinput.cpp @@ -0,0 +1 @@ +#include "bitinput.h" diff --git a/library/cpp/bit_io/bitinput.h b/library/cpp/bit_io/bitinput.h new file mode 100644 index 0000000000..85711eb7f9 --- /dev/null +++ b/library/cpp/bit_io/bitinput.h @@ -0,0 +1,171 @@ +#pragma once + +#include "bitinput_impl.h" + +#include <util/system/yassert.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +#include <iterator> + +namespace NBitIO { + // Based on junk/solar/codecs/bitstream.h + + class TBitInput: protected TBitInputImpl { + public: + template <typename TVec> + explicit TBitInput(const TVec& vec) + : TBitInputImpl(std::begin(vec), std::end(vec)) + { + } + + TBitInput(const char* start, const char* end) + : TBitInputImpl(start, end) + { + } + + bool Eof() const { + return EofImpl(); + } + + ui64 GetOffset() const { + ui64 bo = BitOffset(); + return bo / 8 + !!(bo % 8); + } + + using TBitInputImpl::GetBitLength; + + ui64 GetBitOffset() const { + return BitOffset() % 8; + } + + public: + // Read with static number of bits. + // Preserves what's in result. + template <ui64 bits, typename T> + Y_FORCE_INLINE bool ReadK(T& result, ui64 skipbits) { + ui64 r64 = 0; + bool ret = bits <= 56 ? ReadKImpl<bits>(r64) : ReadSafe(r64, bits); + CopyToResultK<bits>(result, r64, skipbits); + return ret; + } + + // Read with static number of bits. + // Zeroes other bits in result. + template <ui64 bits, typename T> + Y_FORCE_INLINE bool ReadK(T& result) { + ui64 r = 0; + bool res = ReadK<bits>(r); + result = r; + return res; + } + + // Shortcut to impl. + template <ui64 bits> + Y_FORCE_INLINE bool ReadK(ui64& result) { + if (bits <= 56) + return ReadKImpl<bits>(result); + + ui64 r1 = 0ULL; + ui64 r2 = 0ULL; + + bool ret1 = ReadKImpl<56ULL>(r1); + bool ret2 = ReadKImpl<(bits > 56ULL ? bits - 56ULL : 0) /*or else we get negative param in template*/>(r2); + + result = (r2 << 56ULL) | r1; + + return ret1 & ret2; + } + + // It's safe to read up to 64 bits. + // Zeroes other bits in result. + template <typename T> + Y_FORCE_INLINE bool ReadSafe(T& result, ui64 bits) { + if (bits <= 56ULL) + return Read(result, bits); + + ui64 r1 = 0ULL; + ui64 r2 = 0ULL; + + bool ret1 = ReadKImpl<56ULL>(r1); + bool ret2 = ReadImpl(r2, bits - 56ULL); + + result = (r2 << 56ULL) | r1; + + return ret1 & ret2; + } + + // It's safe to read up to 64 bits. + // Preserves what's in result. + template <typename T> + Y_FORCE_INLINE bool ReadSafe(T& result, ui64 bits, ui64 skipbits) { + ui64 r64 = 0; + bool ret = ReadSafe(r64, bits); + CopyToResult(result, r64, bits, skipbits); + return ret; + } + + // Do not try to read more than 56 bits at once. Split in two reads or use ReadSafe. + // Zeroes other bits in result. + template <typename T> + Y_FORCE_INLINE bool Read(T& result, ui64 bits) { + ui64 r64 = 0; + bool ret = ReadImpl(r64, bits); + result = r64; + return ret; + } + + // Shortcut to impl. + Y_FORCE_INLINE bool Read(ui64& result, ui64 bits) { + return ReadImpl(result, bits); + } + + // Do not try to read more than 56 bits at once. Split in two reads or use ReadSafe. + // Preserves what's in result. + template <typename T> + Y_FORCE_INLINE bool Read(T& result, ui64 bits, ui64 skipbits) { + ui64 r64 = 0; + bool ret = ReadImpl(r64, bits); + CopyToResult(result, r64, bits, skipbits); + return ret; + } + + // Unsigned wordwise read. Underlying data is splitted in "words" of "bits(data) + 1(flag)" bits. + // Like this: (unsigned char)0x2E<3> (0010 1110) <=> 1110 0101 + // fddd fddd + template <ui64 bits, typename T> + Y_FORCE_INLINE bool ReadWords(T& result) { + ui64 r64 = 0; + + bool retCode = ReadWordsImpl<bits>(r64); + result = r64; + + return retCode; + } + + // Shortcut to impl. + template <ui64 bits> + Y_FORCE_INLINE bool ReadWords(ui64& result) { + return ReadWordsImpl<bits>(result); + } + + Y_FORCE_INLINE bool Back(int bits) { + return Seek(BitOffset() - bits); + } + + Y_FORCE_INLINE bool Seek(int bitoffset) { + return TBitInputImpl::Seek(bitoffset); + } + + // A way to read a portion of bits at random location. + // Didn't want to complicate sequential read, neither to copypaste. + template <typename T> + Y_FORCE_INLINE bool ReadRandom(ui64 bitoffset, T& result, ui64 bits, ui64 skipbits) { + const ui64 curr = BitOffset(); + Seek(bitoffset); + bool ret = ReadSafe<T>(result, bits, skipbits); + Seek(curr); + return ret; + } + }; +} diff --git a/library/cpp/bit_io/bitinput_impl.cpp b/library/cpp/bit_io/bitinput_impl.cpp new file mode 100644 index 0000000000..81c897f882 --- /dev/null +++ b/library/cpp/bit_io/bitinput_impl.cpp @@ -0,0 +1 @@ +#include "bitinput_impl.h" diff --git a/library/cpp/bit_io/bitinput_impl.h b/library/cpp/bit_io/bitinput_impl.h new file mode 100644 index 0000000000..b13fbef101 --- /dev/null +++ b/library/cpp/bit_io/bitinput_impl.h @@ -0,0 +1,110 @@ +#pragma once + +#include <util/generic/bitops.h> +#include <util/system/unaligned_mem.h> + +namespace NBitIO { + class TBitInputImpl { + i64 RealStart; + i64 Start; + ui64 Length; + ui64 BOffset; + const ui32 FakeStart; + char Fake[16]; + const i64 FStart; + + public: + TBitInputImpl(const char* start, const char* end) + : RealStart((i64)start) + , Start((i64)start) + , Length((end - start) << 3) + , BOffset(0) + , FakeStart(Length > 64 ? Length - 64 : 0) + , FStart((i64)Fake - (FakeStart >> 3)) + { + memcpy(Fake, (const char*)(RealStart + (FakeStart >> 3)), (Length - FakeStart) >> 3); + Start = FakeStart ? RealStart : FStart; + } + + ui64 GetBitLength() const { + return Length; + } + + protected: + template <ui32 bits> + Y_FORCE_INLINE bool ReadKImpl(ui64& result) { + result = (ReadUnaligned<ui64>((const void*)(Start + (BOffset >> 3))) >> (BOffset & 7)) & Mask64(bits); + BOffset += bits; + if (BOffset < FakeStart) + return true; + if (BOffset > Length) { + result = 0; + BOffset -= bits; + return false; + } + Start = FStart; + return true; + } + + Y_FORCE_INLINE bool ReadImpl(ui64& result, ui32 bits) { + result = (ReadUnaligned<ui64>((const void*)(Start + (BOffset >> 3))) >> (BOffset & 7)) & MaskLowerBits(bits); + BOffset += bits; + if (BOffset < FakeStart) + return true; + if (Y_UNLIKELY(BOffset > Length)) { + result = 0; + BOffset -= bits; + return false; + } + Start = FStart; + return true; + } + + Y_FORCE_INLINE bool EofImpl() const { + return BOffset >= Length; + } + + Y_FORCE_INLINE ui64 BitOffset() const { + return BOffset; + } + + Y_FORCE_INLINE bool Seek(i64 offset) { + if (offset < 0 || offset > (i64)Length) + return false; + BOffset = offset; + Start = BOffset < FakeStart ? RealStart : FStart; + return true; + } + + protected: + template <ui64 bits, typename T> + Y_FORCE_INLINE static void CopyToResultK(T& result, ui64 r64, ui64 skipbits) { + result = (result & ~(Mask64(bits) << skipbits)) | (r64 << skipbits); + } + + template <typename T> + Y_FORCE_INLINE static void CopyToResult(T& result, ui64 r64, ui64 bits, ui64 skipbits) { + result = (result & InverseMaskLowerBits(bits, skipbits)) | (r64 << skipbits); + } + + public: + template <ui64 bits> + Y_FORCE_INLINE bool ReadWordsImpl(ui64& data) { + data = 0; + + const ui64 haveMore = NthBit64(bits); + const ui64 mask = Mask64(bits); + ui64 current = 0; + ui64 byteNo = 0; + + do { + if (!ReadKImpl<bits + 1>(current)) + return false; + + data |= (current & mask) << (byteNo++ * bits); + } while (current & haveMore); + + return true; + } + }; +} diff --git a/library/cpp/bit_io/bitoutput.cpp b/library/cpp/bit_io/bitoutput.cpp new file mode 100644 index 0000000000..d6c1c095da --- /dev/null +++ b/library/cpp/bit_io/bitoutput.cpp @@ -0,0 +1 @@ +#include "bitoutput.h" diff --git a/library/cpp/bit_io/bitoutput.h b/library/cpp/bit_io/bitoutput.h new file mode 100644 index 0000000000..2b886c1f02 --- /dev/null +++ b/library/cpp/bit_io/bitoutput.h @@ -0,0 +1,195 @@ +#pragma once + +#include <library/cpp/deprecated/accessors/accessors.h> + +#include <util/stream/output.h> +#include <util/system/yassert.h> +#include <util/generic/bitops.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +namespace NBitIO { + // Based on junk/solar/codecs/bitstream.h + + // Almost all code is hard tuned for sequential write performance. + // Use tools/bursttrie/benchmarks/bitstreams_benchmark to check your changes. + + inline constexpr ui64 BytesUp(ui64 bits) { + return (bits + 7ULL) >> 3ULL; + } + + template <typename TStorage> + class TBitOutputBase { + protected: + TStorage* Storage; + ui64 FreeBits; + ui64 Active; + ui64 Offset; + + public: + TBitOutputBase(TStorage* storage) + : Storage(storage) + , FreeBits(64) + , Active() + , Offset() + { + } + + ui64 GetOffset() const { + return Offset + BytesUp(64ULL - FreeBits); + } + + ui64 GetBitOffset() const { + return (64ULL - FreeBits) & 7ULL; + } + + ui64 GetByteReminder() const { + return FreeBits & 7ULL; + } + + public: + // interface + + // Write "bits" lower bits. + Y_FORCE_INLINE void Write(ui64 data, ui64 bits) { + if (FreeBits < bits) { + if (FreeBits) { + bits -= FreeBits; + + Active |= (data & MaskLowerBits(FreeBits)) << (64ULL - FreeBits); + data >>= FreeBits; + + FreeBits = 0ULL; + } + + Flush(); + } + + Active |= bits ? ((data & MaskLowerBits(bits)) << (64ULL - FreeBits)) : 0; + FreeBits -= bits; + } + + // Write "bits" lower bits starting from "skipbits" bit. + Y_FORCE_INLINE void Write(ui64 data, ui64 bits, ui64 skipbits) { + Write(data >> skipbits, bits); + } + + // Unsigned wordwise write. Underlying data is splitted in "words" of "bits(data) + 1(flag)" bits. + // Like this: (unsigned char)0x2E<3> (0000 0010 1110) <=> 1110 0101 + // fddd fddd + template <ui64 bits> + Y_FORCE_INLINE void WriteWords(ui64 data) { + do { + ui64 part = data; + + data >>= bits; + part |= FastZeroIfFalse(data, NthBit64(bits)); + Write(part, bits + 1ULL); + } while (data); + } + + Y_FORCE_INLINE ui64 /* padded bits */ Flush() { + const ui64 ubytes = 8ULL - (FreeBits >> 3ULL); + + if (ubytes) { + Active <<= FreeBits; + Active >>= FreeBits; + + Storage->WriteData((const char*)&Active, (const char*)&Active + ubytes); + Offset += ubytes; + } + + const ui64 padded = FreeBits & 7; + + FreeBits = 64ULL; + Active = 0ULL; + + return padded; + } + + virtual ~TBitOutputBase() { + Flush(); + } + + private: + static Y_FORCE_INLINE ui64 FastZeroIfFalse(bool cond, ui64 iftrue) { + return -i64(cond) & iftrue; + } + }; + + template <typename TVec> + class TBitOutputVectorImpl { + TVec* Data; + + public: + void WriteData(const char* begin, const char* end) { + NAccessors::Append(*Data, begin, end); + } + + TBitOutputVectorImpl(TVec* data) + : Data(data) + { + } + }; + + template <typename TVec> + struct TBitOutputVector: public TBitOutputVectorImpl<TVec>, public TBitOutputBase<TBitOutputVectorImpl<TVec>> { + inline TBitOutputVector(TVec* data) + : TBitOutputVectorImpl<TVec>(data) + , TBitOutputBase<TBitOutputVectorImpl<TVec>>(this) + { + } + }; + + class TBitOutputArrayImpl { + char* Data; + size_t Left; + + public: + void WriteData(const char* begin, const char* end) { + size_t sz = end - begin; + Y_VERIFY(sz <= Left, " "); + memcpy(Data, begin, sz); + Data += sz; + Left -= sz; + } + + TBitOutputArrayImpl(char* begin, size_t len) + : Data(begin) + , Left(len) + { + } + }; + + struct TBitOutputArray: public TBitOutputArrayImpl, public TBitOutputBase<TBitOutputArrayImpl> { + inline TBitOutputArray(char* begin, size_t len) + : TBitOutputArrayImpl(begin, len) + , TBitOutputBase<TBitOutputArrayImpl>(this) + { + } + }; + + using TBitOutputYVector = TBitOutputVector<TVector<char>>; + + class TBitOutputStreamImpl { + IOutputStream* Out; + + public: + void WriteData(const char* begin, const char* end) { + Out->Write(begin, end - begin); + } + + TBitOutputStreamImpl(IOutputStream* out) + : Out(out) + { + } + }; + + struct TBitOutputStream: public TBitOutputStreamImpl, public TBitOutputBase<TBitOutputStreamImpl> { + inline TBitOutputStream(IOutputStream* out) + : TBitOutputStreamImpl(out) + , TBitOutputBase<TBitOutputStreamImpl>(this) + { + } + }; +} diff --git a/library/cpp/bit_io/ut/ya.make b/library/cpp/bit_io/ut/ya.make new file mode 100644 index 0000000000..07ee5b4997 --- /dev/null +++ b/library/cpp/bit_io/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/bit_io) + +OWNER( + velavokr + g:util +) + +SRCS( + bitinout_ut.cpp +) + +END() diff --git a/library/cpp/bit_io/ya.make b/library/cpp/bit_io/ya.make new file mode 100644 index 0000000000..df1de81ff9 --- /dev/null +++ b/library/cpp/bit_io/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +OWNER( + velavokr + g:util +) + +PEERDIR( + library/cpp/deprecated/accessors +) + +SRCS( + bitinput.cpp + bitinput_impl.cpp + bitoutput.cpp +) + +END() |