aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/compproto/bit.h
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:17 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:17 +0300
commitd3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch)
treedd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/compproto/bit.h
parent72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff)
downloadydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/compproto/bit.h')
-rw-r--r--library/cpp/compproto/bit.h654
1 files changed, 327 insertions, 327 deletions
diff --git a/library/cpp/compproto/bit.h b/library/cpp/compproto/bit.h
index 7d2800843f..6a421b65f7 100644
--- a/library/cpp/compproto/bit.h
+++ b/library/cpp/compproto/bit.h
@@ -3,346 +3,346 @@
#include <util/generic/array_ref.h>
#include <util/generic/vector.h>
#include <util/stream/output.h>
-#include <util/stream/input.h>
+#include <util/stream/input.h>
#include "huff.h"
#include "compressor.h"
-#include "metainfo.h"
+#include "metainfo.h"
namespace NCompProto {
- struct TBitBuffer {
- TVector<ui8> Out;
- ui64 Position;
- ui64 Size;
- ui8 Counter;
- TBitBuffer() {
- Clear();
- }
-
- static ui64 Read(const ui8* out, ui64 position, size_t size) {
- const ui8* dst = out + (size_t(position >> 3));
- ui64 outCode = *reinterpret_cast<const ui64*>(dst);
- ui8 shift = position & 7;
- return ((1ULL << size) - 1) & (outCode >> shift);
- }
-
- ui64 Read(ui64 position, size_t size) {
- return Read(&Out[0], position, size);
- }
-
- void Code(ui64 value, size_t size) {
- if (++Counter == 0) {
- Junk(257 * 64);
- }
- Position = Code(value, size, Position);
- }
- ui64 Code(ui64 value, size_t size, ui64 position) {
- ui8* dst = &Out[size_t(position >> 3)];
- ui64& outCode = *(ui64*)dst;
- ui8 shift = position & 7;
- ui64 mask = ((1ULL << size) - 1) << shift;
- outCode = ((value << shift) & mask) | (outCode & ~mask);
- return position + size;
- }
- void Junk(size_t junk = 1024) {
- size_t need = size_t(Position >> 3);
- if (Out.size() * 8 < Position + junk) {
- Out.resize(((need + junk) * 3) / 2);
- Size = Out.size();
- }
- }
- void Insert(ui64 value, ui64 position, size_t size) {
- ui64 newVal = Read(position, 56);
- position = Code(value, size, position);
- value = newVal;
-
- while (position < Position + 64) {
- newVal = Read(position + 56 - size, 56);
- position = Code(value, 56, position);
- value = newVal;
- }
-
- Position += size;
- }
-
- size_t ByteLength() const {
- return (Position + 7) / 8;
- }
-
- void ResetPosition() {
- Position = 0;
- Size = 0;
- }
-
- void Clear() {
- Counter = 0;
- Position = 0;
- Size = 0;
- Out.clear();
- Junk();
- }
+ struct TBitBuffer {
+ TVector<ui8> Out;
+ ui64 Position;
+ ui64 Size;
+ ui8 Counter;
+ TBitBuffer() {
+ Clear();
+ }
+
+ static ui64 Read(const ui8* out, ui64 position, size_t size) {
+ const ui8* dst = out + (size_t(position >> 3));
+ ui64 outCode = *reinterpret_cast<const ui64*>(dst);
+ ui8 shift = position & 7;
+ return ((1ULL << size) - 1) & (outCode >> shift);
+ }
+
+ ui64 Read(ui64 position, size_t size) {
+ return Read(&Out[0], position, size);
+ }
+
+ void Code(ui64 value, size_t size) {
+ if (++Counter == 0) {
+ Junk(257 * 64);
+ }
+ Position = Code(value, size, Position);
+ }
+ ui64 Code(ui64 value, size_t size, ui64 position) {
+ ui8* dst = &Out[size_t(position >> 3)];
+ ui64& outCode = *(ui64*)dst;
+ ui8 shift = position & 7;
+ ui64 mask = ((1ULL << size) - 1) << shift;
+ outCode = ((value << shift) & mask) | (outCode & ~mask);
+ return position + size;
+ }
+ void Junk(size_t junk = 1024) {
+ size_t need = size_t(Position >> 3);
+ if (Out.size() * 8 < Position + junk) {
+ Out.resize(((need + junk) * 3) / 2);
+ Size = Out.size();
+ }
+ }
+ void Insert(ui64 value, ui64 position, size_t size) {
+ ui64 newVal = Read(position, 56);
+ position = Code(value, size, position);
+ value = newVal;
+
+ while (position < Position + 64) {
+ newVal = Read(position + 56 - size, 56);
+ position = Code(value, 56, position);
+ value = newVal;
+ }
+
+ Position += size;
+ }
+
+ size_t ByteLength() const {
+ return (Position + 7) / 8;
+ }
+
+ void ResetPosition() {
+ Position = 0;
+ Size = 0;
+ }
+
+ void Clear() {
+ Counter = 0;
+ Position = 0;
+ Size = 0;
+ Out.clear();
+ Junk();
+ }
TArrayRef<const char> AsDataRegion() const {
return TArrayRef<const char>(reinterpret_cast<const char*>(Out.data()), ByteLength());
- }
- };
-
- struct THuff {
- TCoder Coder;
- ui64 Position;
- THuff() {
- Position = (ui64)(-1);
- }
- void Load(IInputStream& stream) {
- TString name;
- Coder.InitDefault();
- Coder.Entries.clear();
- while (1) {
- stream >> name;
- if (name == "end")
- return;
- else if (name == "entry") {
- TCoderEntry entry;
- ui32 value;
- stream >> value;
- entry.MinValue = value;
- stream >> value;
- entry.Prefix = value;
- stream >> value;
- entry.PrefixBits = value;
- stream >> value;
- entry.AllBits = value;
- Coder.Entries.push_back(entry);
- }
- }
- }
-
- void Save(IOutputStream& stream, TString offset) {
- TString step = " ";
- for (size_t i = 0; i < Coder.Entries.size(); ++i) {
- stream << offset << step << "entry ";
- stream << (ui32)Coder.Entries[i].MinValue << " ";
- stream << (ui32)Coder.Entries[i].Prefix << " ";
- stream << (ui32)Coder.Entries[i].PrefixBits << " ";
- stream << (ui32)Coder.Entries[i].AllBits << " ";
- stream << Endl;
+ }
+ };
+
+ struct THuff {
+ TCoder Coder;
+ ui64 Position;
+ THuff() {
+ Position = (ui64)(-1);
+ }
+ void Load(IInputStream& stream) {
+ TString name;
+ Coder.InitDefault();
+ Coder.Entries.clear();
+ while (1) {
+ stream >> name;
+ if (name == "end")
+ return;
+ else if (name == "entry") {
+ TCoderEntry entry;
+ ui32 value;
+ stream >> value;
+ entry.MinValue = value;
+ stream >> value;
+ entry.Prefix = value;
+ stream >> value;
+ entry.PrefixBits = value;
+ stream >> value;
+ entry.AllBits = value;
+ Coder.Entries.push_back(entry);
+ }
+ }
+ }
+
+ void Save(IOutputStream& stream, TString offset) {
+ TString step = " ";
+ for (size_t i = 0; i < Coder.Entries.size(); ++i) {
+ stream << offset << step << "entry ";
+ stream << (ui32)Coder.Entries[i].MinValue << " ";
+ stream << (ui32)Coder.Entries[i].Prefix << " ";
+ stream << (ui32)Coder.Entries[i].PrefixBits << " ";
+ stream << (ui32)Coder.Entries[i].AllBits << " ";
+ stream << Endl;
+ }
+ stream << offset << "end" << Endl;
+ }
+
+ void BeginElement(TBitBuffer& out) {
+ Position = out.Position;
+ }
+ void Add(ui32 value, TBitBuffer& out) {
+ size_t val = 0;
+ ui64 code = Coder.Code(value, val);
+ out.Code(code, val);
+ }
+ void AddDelayed(ui32 value, TBitBuffer& out) {
+ size_t val = 0;
+ ui64 code = Coder.Code(value, val);
+ if (Position == (ui64)(-1)) {
+ ythrow yexception() << "Position == (ui64)(-1)";
+ }
+ out.Insert(code, Position, val);
+ out.Junk();
+ }
+ };
+
+ struct THist {
+ TAccum Accum;
+ THist() {
+ }
+
+ void Load(IInputStream& stream) {
+ TString name;
+ while (1) {
+ stream >> name;
+ if (name == "end")
+ return;
}
- stream << offset << "end" << Endl;
- }
-
- void BeginElement(TBitBuffer& out) {
- Position = out.Position;
- }
- void Add(ui32 value, TBitBuffer& out) {
- size_t val = 0;
- ui64 code = Coder.Code(value, val);
- out.Code(code, val);
- }
- void AddDelayed(ui32 value, TBitBuffer& out) {
- size_t val = 0;
- ui64 code = Coder.Code(value, val);
- if (Position == (ui64)(-1)) {
- ythrow yexception() << "Position == (ui64)(-1)";
- }
- out.Insert(code, Position, val);
- out.Junk();
- }
- };
-
- struct THist {
- TAccum Accum;
- THist() {
- }
-
- void Load(IInputStream& stream) {
- TString name;
- while (1) {
- stream >> name;
- if (name == "end")
- return;
- }
- }
- // TODO: why not const TString& ???
- void Save(IOutputStream& /*stream*/, TString /*offset*/) {
- }
-
- void Add(ui32 value, TEmpty& /*empty*/) {
- Accum.Add(value);
- }
- void AddDelayed(ui32 value, TEmpty& /*empty*/) {
- Accum.Add(value);
- }
- void BeginElement(TEmpty& /*empty*/) {
- }
- };
-
- struct THistToHuff {
- static THistToHuff Instance() {
- return THistToHuff();
- }
- TEmpty Build() const {
- return TEmpty();
- }
- void Build(THuff& info, const THist& hist) const {
- Analyze(hist.Accum, info.Coder.Entries);
- info.Coder.Normalize();
- }
- };
-
- struct IDecompressor {
- // sequentially decompresses whole structure according to metainfo, starts at position offset
- virtual void Decompress(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) = 0;
- // decompresses one record of outer repeated structure starting at position offset
- virtual void DecompressOne(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset, ui32 prevIndex = -1) = 0;
- virtual ~IDecompressor() = default;
- };
-
- template <class X>
- struct TMetaIterator: public IDecompressor {
- X Self;
- TMetaIterator() {
- Self.Parent = this;
- }
-
- private:
- inline void DecompressSingle(ui32 repeatedIndex, const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) {
- Self.BeginElement(repeatedIndex);
- ui32 mask = table->Mask.Decompress(codes, offset);
- size_t index = 0;
- ui32 scalarMask = table->ScalarMask;
- while (mask || scalarMask) {
- if (mask & 1) {
- if (scalarMask & 1) {
- ui32 val = table->Scalar[index].Decompress(codes, offset);
- Self.SetScalar(index, val);
- } else {
- Self.GetDecompressor(index).Decompress(table->Repeated[index].Get(), codes, offset);
- }
- } else if ((scalarMask & 1) && table->Default[index].Type == TScalarDefaultValue::Fixed) {
- Self.SetScalar(index, table->Default[index].Value);
- }
- scalarMask = scalarMask >> 1;
- mask = mask >> 1;
- ++index;
- }
- Self.EndElement();
- }
-
- inline void DecompressSingleScalarsOnly(ui32 repeatedIndex, const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) {
- Self.BeginElement(repeatedIndex);
- ui32 mask = table->Mask.Decompress(codes, offset);
- ui32 scalarMask = table->ScalarMask;
- size_t index = 0;
- while (scalarMask) {
- if (mask & 1) {
+ }
+ // TODO: why not const TString& ???
+ void Save(IOutputStream& /*stream*/, TString /*offset*/) {
+ }
+
+ void Add(ui32 value, TEmpty& /*empty*/) {
+ Accum.Add(value);
+ }
+ void AddDelayed(ui32 value, TEmpty& /*empty*/) {
+ Accum.Add(value);
+ }
+ void BeginElement(TEmpty& /*empty*/) {
+ }
+ };
+
+ struct THistToHuff {
+ static THistToHuff Instance() {
+ return THistToHuff();
+ }
+ TEmpty Build() const {
+ return TEmpty();
+ }
+ void Build(THuff& info, const THist& hist) const {
+ Analyze(hist.Accum, info.Coder.Entries);
+ info.Coder.Normalize();
+ }
+ };
+
+ struct IDecompressor {
+ // sequentially decompresses whole structure according to metainfo, starts at position offset
+ virtual void Decompress(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) = 0;
+ // decompresses one record of outer repeated structure starting at position offset
+ virtual void DecompressOne(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset, ui32 prevIndex = -1) = 0;
+ virtual ~IDecompressor() = default;
+ };
+
+ template <class X>
+ struct TMetaIterator: public IDecompressor {
+ X Self;
+ TMetaIterator() {
+ Self.Parent = this;
+ }
+
+ private:
+ inline void DecompressSingle(ui32 repeatedIndex, const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) {
+ Self.BeginElement(repeatedIndex);
+ ui32 mask = table->Mask.Decompress(codes, offset);
+ size_t index = 0;
+ ui32 scalarMask = table->ScalarMask;
+ while (mask || scalarMask) {
+ if (mask & 1) {
+ if (scalarMask & 1) {
+ ui32 val = table->Scalar[index].Decompress(codes, offset);
+ Self.SetScalar(index, val);
+ } else {
+ Self.GetDecompressor(index).Decompress(table->Repeated[index].Get(), codes, offset);
+ }
+ } else if ((scalarMask & 1) && table->Default[index].Type == TScalarDefaultValue::Fixed) {
+ Self.SetScalar(index, table->Default[index].Value);
+ }
+ scalarMask = scalarMask >> 1;
+ mask = mask >> 1;
+ ++index;
+ }
+ Self.EndElement();
+ }
+
+ inline void DecompressSingleScalarsOnly(ui32 repeatedIndex, const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) {
+ Self.BeginElement(repeatedIndex);
+ ui32 mask = table->Mask.Decompress(codes, offset);
+ ui32 scalarMask = table->ScalarMask;
+ size_t index = 0;
+ while (scalarMask) {
+ if (mask & 1) {
ui32 val = table->Scalar[index].Decompress(codes, offset);
Self.SetScalar(index, val);
- } else if (table->Default[index].Type == TScalarDefaultValue::Fixed) {
- Self.SetScalar(index, table->Default[index].Value);
+ } else if (table->Default[index].Type == TScalarDefaultValue::Fixed) {
+ Self.SetScalar(index, table->Default[index].Value);
}
- mask = mask >> 1;
- scalarMask = scalarMask >> 1;
- ++index;
+ mask = mask >> 1;
+ scalarMask = scalarMask >> 1;
+ ++index;
}
- Self.EndElement();
- }
-
- public:
- void Decompress(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) override {
- ui64 locOffset = offset;
- ui32 count = table->Count.Decompress(codes, locOffset);
- ui32 repeatedIndex = (ui32)(-1);
- Self.BeginSelf(count, table->Id);
- if (table->RepeatedMask) {
- for (ui32 i = 0; i < count; ++i) {
- repeatedIndex += table->Index.Decompress(codes, locOffset);
- DecompressSingle(repeatedIndex, table, codes, locOffset);
- }
- } else {
- for (ui32 i = 0; i < count; ++i) {
- repeatedIndex += table->Index.Decompress(codes, locOffset);
- DecompressSingleScalarsOnly(repeatedIndex, table, codes, locOffset);
- }
+ Self.EndElement();
+ }
+
+ public:
+ void Decompress(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) override {
+ ui64 locOffset = offset;
+ ui32 count = table->Count.Decompress(codes, locOffset);
+ ui32 repeatedIndex = (ui32)(-1);
+ Self.BeginSelf(count, table->Id);
+ if (table->RepeatedMask) {
+ for (ui32 i = 0; i < count; ++i) {
+ repeatedIndex += table->Index.Decompress(codes, locOffset);
+ DecompressSingle(repeatedIndex, table, codes, locOffset);
+ }
+ } else {
+ for (ui32 i = 0; i < count; ++i) {
+ repeatedIndex += table->Index.Decompress(codes, locOffset);
+ DecompressSingleScalarsOnly(repeatedIndex, table, codes, locOffset);
+ }
}
- offset = locOffset;
- Self.EndSelf();
- }
-
- // XXX: iterator needed?
- //
- // Following two functions serves the purpose of decompressing outer repeated-structure(such structure is mandatory now).
- // They can work for inner repeated structures too, if you supply correct table, codes and offset parameters.
- void DecompressOne(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset, ui32 prevIndex = -1) override {
- table->Index.Decompress(codes, offset);
- DecompressSingle(prevIndex, table, codes, offset);
- }
-
- ui32 DecompressCount(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) {
- return table->Count.Decompress(codes, offset);
- }
- };
-
- template <class X>
- struct TParentHold {
- TMetaIterator<X>* Parent;
- TParentHold()
- : Parent(nullptr)
- {
- }
- };
-
- struct TEmptyDecompressor: public TParentHold<TEmptyDecompressor> {
- void BeginSelf(ui32 /*count*/, ui32 /*id*/) {
- }
- void EndSelf() {
- }
- void BeginElement(ui32 /*element*/) {
- }
- void EndElement() {
- }
- void SetScalar(size_t /*index*/, ui32 /*val*/) {
- }
- TMetaIterator<TEmptyDecompressor>& GetDecompressor(size_t index) {
- Y_UNUSED(index);
- return *Parent;
- }
- };
-
- inline TMetaIterator<TEmptyDecompressor>& GetEmptyDecompressor() {
- static TMetaIterator<TEmptyDecompressor> empty;
- return empty;
+ offset = locOffset;
+ Self.EndSelf();
+ }
+
+ // XXX: iterator needed?
+ //
+ // Following two functions serves the purpose of decompressing outer repeated-structure(such structure is mandatory now).
+ // They can work for inner repeated structures too, if you supply correct table, codes and offset parameters.
+ void DecompressOne(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset, ui32 prevIndex = -1) override {
+ table->Index.Decompress(codes, offset);
+ DecompressSingle(prevIndex, table, codes, offset);
+ }
+
+ ui32 DecompressCount(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) {
+ return table->Count.Decompress(codes, offset);
+ }
+ };
+
+ template <class X>
+ struct TParentHold {
+ TMetaIterator<X>* Parent;
+ TParentHold()
+ : Parent(nullptr)
+ {
+ }
+ };
+
+ struct TEmptyDecompressor: public TParentHold<TEmptyDecompressor> {
+ void BeginSelf(ui32 /*count*/, ui32 /*id*/) {
+ }
+ void EndSelf() {
+ }
+ void BeginElement(ui32 /*element*/) {
+ }
+ void EndElement() {
+ }
+ void SetScalar(size_t /*index*/, ui32 /*val*/) {
+ }
+ TMetaIterator<TEmptyDecompressor>& GetDecompressor(size_t index) {
+ Y_UNUSED(index);
+ return *Parent;
+ }
+ };
+
+ inline TMetaIterator<TEmptyDecompressor>& GetEmptyDecompressor() {
+ static TMetaIterator<TEmptyDecompressor> empty;
+ return empty;
}
- struct THuffToTable {
- static THuffToTable Instance() {
- return THuffToTable();
- }
- void Build(TTable& table, const THuff& huff) const {
- // can happen if a field was never serialized across whole structure
- if (huff.Coder.Entries.empty())
- return;
- for (ui32 i = 0; i < 64; ++i) {
- const TCoderEntry& entry = huff.Coder.GetEntry(i, table.Id[i]);
- table.CodeBase[i] = entry.MinValue;
- table.PrefLength[i] = entry.PrefixBits;
- table.CodeMask[i] = (1ULL << (entry.AllBits - entry.PrefixBits)) - 1ULL;
- table.Length[i] = entry.AllBits;
- }
- }
- };
-
- struct THuffToTableWithDecompressor: private THuffToTable {
- TSimpleSharedPtr<IDecompressor> Decompressor;
- THuffToTableWithDecompressor(TSimpleSharedPtr<IDecompressor> decompressor)
- : Decompressor(decompressor)
- {
- }
- TSimpleSharedPtr<IDecompressor> Build() const {
- return Decompressor;
- }
- void Build(TTable& table, const THuff& huff) const {
- THuffToTable::Build(table, huff);
- }
- };
-
-}
+ struct THuffToTable {
+ static THuffToTable Instance() {
+ return THuffToTable();
+ }
+ void Build(TTable& table, const THuff& huff) const {
+ // can happen if a field was never serialized across whole structure
+ if (huff.Coder.Entries.empty())
+ return;
+ for (ui32 i = 0; i < 64; ++i) {
+ const TCoderEntry& entry = huff.Coder.GetEntry(i, table.Id[i]);
+ table.CodeBase[i] = entry.MinValue;
+ table.PrefLength[i] = entry.PrefixBits;
+ table.CodeMask[i] = (1ULL << (entry.AllBits - entry.PrefixBits)) - 1ULL;
+ table.Length[i] = entry.AllBits;
+ }
+ }
+ };
+
+ struct THuffToTableWithDecompressor: private THuffToTable {
+ TSimpleSharedPtr<IDecompressor> Decompressor;
+ THuffToTableWithDecompressor(TSimpleSharedPtr<IDecompressor> decompressor)
+ : Decompressor(decompressor)
+ {
+ }
+ TSimpleSharedPtr<IDecompressor> Build() const {
+ return Decompressor;
+ }
+ void Build(TTable& table, const THuff& huff) const {
+ THuffToTable::Build(table, huff);
+ }
+ };
+
+}