diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/compproto | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/compproto')
-rw-r--r-- | library/cpp/compproto/bit.h | 348 | ||||
-rw-r--r-- | library/cpp/compproto/compproto_ut.cpp | 543 | ||||
-rw-r--r-- | library/cpp/compproto/compressor.h | 74 | ||||
-rw-r--r-- | library/cpp/compproto/huff.h | 402 | ||||
-rw-r--r-- | library/cpp/compproto/lib.cpp | 4 | ||||
-rw-r--r-- | library/cpp/compproto/metainfo.h | 304 | ||||
-rw-r--r-- | library/cpp/compproto/ut/ya.make | 15 | ||||
-rw-r--r-- | library/cpp/compproto/ya.make | 13 |
8 files changed, 1703 insertions, 0 deletions
diff --git a/library/cpp/compproto/bit.h b/library/cpp/compproto/bit.h new file mode 100644 index 00000000000..6a421b65f70 --- /dev/null +++ b/library/cpp/compproto/bit.h @@ -0,0 +1,348 @@ +#pragma once + +#include <util/generic/array_ref.h> +#include <util/generic/vector.h> +#include <util/stream/output.h> +#include <util/stream/input.h> + +#include "huff.h" +#include "compressor.h" +#include "metainfo.h" + +namespace NCompProto { + struct TBitBuffer { + TVector<ui8> Out; + ui64 Position; + ui64 Size; + ui8 Counter; + TBitBuffer() { + Clear(); + } + + static ui64 Read(const ui8* out, ui64 position, size_t size) { + const ui8* dst = out + (size_t(position >> 3)); + ui64 outCode = *reinterpret_cast<const ui64*>(dst); + ui8 shift = position & 7; + return ((1ULL << size) - 1) & (outCode >> shift); + } + + ui64 Read(ui64 position, size_t size) { + return Read(&Out[0], position, size); + } + + void Code(ui64 value, size_t size) { + if (++Counter == 0) { + Junk(257 * 64); + } + Position = Code(value, size, Position); + } + ui64 Code(ui64 value, size_t size, ui64 position) { + ui8* dst = &Out[size_t(position >> 3)]; + ui64& outCode = *(ui64*)dst; + ui8 shift = position & 7; + ui64 mask = ((1ULL << size) - 1) << shift; + outCode = ((value << shift) & mask) | (outCode & ~mask); + return position + size; + } + void Junk(size_t junk = 1024) { + size_t need = size_t(Position >> 3); + if (Out.size() * 8 < Position + junk) { + Out.resize(((need + junk) * 3) / 2); + Size = Out.size(); + } + } + void Insert(ui64 value, ui64 position, size_t size) { + ui64 newVal = Read(position, 56); + position = Code(value, size, position); + value = newVal; + + while (position < Position + 64) { + newVal = Read(position + 56 - size, 56); + position = Code(value, 56, position); + value = newVal; + } + + Position += size; + } + + size_t ByteLength() const { + return (Position + 7) / 8; + } + + void ResetPosition() { + Position = 0; + Size = 0; + } + + void Clear() { + Counter = 0; + Position = 0; + Size = 0; + Out.clear(); + Junk(); + } + + TArrayRef<const char> AsDataRegion() const { + return TArrayRef<const char>(reinterpret_cast<const char*>(Out.data()), ByteLength()); + } + }; + + struct THuff { + TCoder Coder; + ui64 Position; + THuff() { + Position = (ui64)(-1); + } + void Load(IInputStream& stream) { + TString name; + Coder.InitDefault(); + Coder.Entries.clear(); + while (1) { + stream >> name; + if (name == "end") + return; + else if (name == "entry") { + TCoderEntry entry; + ui32 value; + stream >> value; + entry.MinValue = value; + stream >> value; + entry.Prefix = value; + stream >> value; + entry.PrefixBits = value; + stream >> value; + entry.AllBits = value; + Coder.Entries.push_back(entry); + } + } + } + + void Save(IOutputStream& stream, TString offset) { + TString step = " "; + for (size_t i = 0; i < Coder.Entries.size(); ++i) { + stream << offset << step << "entry "; + stream << (ui32)Coder.Entries[i].MinValue << " "; + stream << (ui32)Coder.Entries[i].Prefix << " "; + stream << (ui32)Coder.Entries[i].PrefixBits << " "; + stream << (ui32)Coder.Entries[i].AllBits << " "; + stream << Endl; + } + stream << offset << "end" << Endl; + } + + void BeginElement(TBitBuffer& out) { + Position = out.Position; + } + void Add(ui32 value, TBitBuffer& out) { + size_t val = 0; + ui64 code = Coder.Code(value, val); + out.Code(code, val); + } + void AddDelayed(ui32 value, TBitBuffer& out) { + size_t val = 0; + ui64 code = Coder.Code(value, val); + if (Position == (ui64)(-1)) { + ythrow yexception() << "Position == (ui64)(-1)"; + } + out.Insert(code, Position, val); + out.Junk(); + } + }; + + struct THist { + TAccum Accum; + THist() { + } + + void Load(IInputStream& stream) { + TString name; + while (1) { + stream >> name; + if (name == "end") + return; + } + } + // TODO: why not const TString& ??? + void Save(IOutputStream& /*stream*/, TString /*offset*/) { + } + + void Add(ui32 value, TEmpty& /*empty*/) { + Accum.Add(value); + } + void AddDelayed(ui32 value, TEmpty& /*empty*/) { + Accum.Add(value); + } + void BeginElement(TEmpty& /*empty*/) { + } + }; + + struct THistToHuff { + static THistToHuff Instance() { + return THistToHuff(); + } + TEmpty Build() const { + return TEmpty(); + } + void Build(THuff& info, const THist& hist) const { + Analyze(hist.Accum, info.Coder.Entries); + info.Coder.Normalize(); + } + }; + + struct IDecompressor { + // sequentially decompresses whole structure according to metainfo, starts at position offset + virtual void Decompress(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) = 0; + // decompresses one record of outer repeated structure starting at position offset + virtual void DecompressOne(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset, ui32 prevIndex = -1) = 0; + virtual ~IDecompressor() = default; + }; + + template <class X> + struct TMetaIterator: public IDecompressor { + X Self; + TMetaIterator() { + Self.Parent = this; + } + + private: + inline void DecompressSingle(ui32 repeatedIndex, const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) { + Self.BeginElement(repeatedIndex); + ui32 mask = table->Mask.Decompress(codes, offset); + size_t index = 0; + ui32 scalarMask = table->ScalarMask; + while (mask || scalarMask) { + if (mask & 1) { + if (scalarMask & 1) { + ui32 val = table->Scalar[index].Decompress(codes, offset); + Self.SetScalar(index, val); + } else { + Self.GetDecompressor(index).Decompress(table->Repeated[index].Get(), codes, offset); + } + } else if ((scalarMask & 1) && table->Default[index].Type == TScalarDefaultValue::Fixed) { + Self.SetScalar(index, table->Default[index].Value); + } + scalarMask = scalarMask >> 1; + mask = mask >> 1; + ++index; + } + Self.EndElement(); + } + + inline void DecompressSingleScalarsOnly(ui32 repeatedIndex, const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) { + Self.BeginElement(repeatedIndex); + ui32 mask = table->Mask.Decompress(codes, offset); + ui32 scalarMask = table->ScalarMask; + size_t index = 0; + while (scalarMask) { + if (mask & 1) { + ui32 val = table->Scalar[index].Decompress(codes, offset); + Self.SetScalar(index, val); + } else if (table->Default[index].Type == TScalarDefaultValue::Fixed) { + Self.SetScalar(index, table->Default[index].Value); + } + mask = mask >> 1; + scalarMask = scalarMask >> 1; + ++index; + } + Self.EndElement(); + } + + public: + void Decompress(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) override { + ui64 locOffset = offset; + ui32 count = table->Count.Decompress(codes, locOffset); + ui32 repeatedIndex = (ui32)(-1); + Self.BeginSelf(count, table->Id); + if (table->RepeatedMask) { + for (ui32 i = 0; i < count; ++i) { + repeatedIndex += table->Index.Decompress(codes, locOffset); + DecompressSingle(repeatedIndex, table, codes, locOffset); + } + } else { + for (ui32 i = 0; i < count; ++i) { + repeatedIndex += table->Index.Decompress(codes, locOffset); + DecompressSingleScalarsOnly(repeatedIndex, table, codes, locOffset); + } + } + offset = locOffset; + Self.EndSelf(); + } + + // XXX: iterator needed? + // + // Following two functions serves the purpose of decompressing outer repeated-structure(such structure is mandatory now). + // They can work for inner repeated structures too, if you supply correct table, codes and offset parameters. + void DecompressOne(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset, ui32 prevIndex = -1) override { + table->Index.Decompress(codes, offset); + DecompressSingle(prevIndex, table, codes, offset); + } + + ui32 DecompressCount(const TMetaInfo<TTable>* table, const ui8* codes, ui64& offset) { + return table->Count.Decompress(codes, offset); + } + }; + + template <class X> + struct TParentHold { + TMetaIterator<X>* Parent; + TParentHold() + : Parent(nullptr) + { + } + }; + + struct TEmptyDecompressor: public TParentHold<TEmptyDecompressor> { + void BeginSelf(ui32 /*count*/, ui32 /*id*/) { + } + void EndSelf() { + } + void BeginElement(ui32 /*element*/) { + } + void EndElement() { + } + void SetScalar(size_t /*index*/, ui32 /*val*/) { + } + TMetaIterator<TEmptyDecompressor>& GetDecompressor(size_t index) { + Y_UNUSED(index); + return *Parent; + } + }; + + inline TMetaIterator<TEmptyDecompressor>& GetEmptyDecompressor() { + static TMetaIterator<TEmptyDecompressor> empty; + return empty; + } + + struct THuffToTable { + static THuffToTable Instance() { + return THuffToTable(); + } + void Build(TTable& table, const THuff& huff) const { + // can happen if a field was never serialized across whole structure + if (huff.Coder.Entries.empty()) + return; + for (ui32 i = 0; i < 64; ++i) { + const TCoderEntry& entry = huff.Coder.GetEntry(i, table.Id[i]); + table.CodeBase[i] = entry.MinValue; + table.PrefLength[i] = entry.PrefixBits; + table.CodeMask[i] = (1ULL << (entry.AllBits - entry.PrefixBits)) - 1ULL; + table.Length[i] = entry.AllBits; + } + } + }; + + struct THuffToTableWithDecompressor: private THuffToTable { + TSimpleSharedPtr<IDecompressor> Decompressor; + THuffToTableWithDecompressor(TSimpleSharedPtr<IDecompressor> decompressor) + : Decompressor(decompressor) + { + } + TSimpleSharedPtr<IDecompressor> Build() const { + return Decompressor; + } + void Build(TTable& table, const THuff& huff) const { + THuffToTable::Build(table, huff); + } + }; + +} diff --git a/library/cpp/compproto/compproto_ut.cpp b/library/cpp/compproto/compproto_ut.cpp new file mode 100644 index 00000000000..9393be967a3 --- /dev/null +++ b/library/cpp/compproto/compproto_ut.cpp @@ -0,0 +1,543 @@ +#include "huff.h" +#include "metainfo.h" +#include "bit.h" + +#include <util/generic/vector.h> +#include <util/generic/map.h> +#include <util/system/protect.h> + +#include <library/cpp/testing/unittest/registar.h> + +static ui64 gSeed = 42; + +static void FlushPseudoRandom() { + gSeed = 42; +} + +static ui32 PseudoRandom(ui32 max) { + // stupid and non-threadsafe, but very predictable chaos generator + gSeed += 1; + gSeed *= 419; + gSeed = gSeed ^ (ui64(max) << 17); + return gSeed % max; +}; + +enum ECompMode { + CM_SINGLEPASS, + CM_TWOPASS +}; + +struct TTestParams { + size_t DataSize; + ui32 ValueArraySize; +}; + +template <typename X> +void TestSaveLoadMeta(NCompProto::TMetaInfo<X>& src) { + TStringStream ss; + src.Save(ss); + TString data = ss.Str(); + NCompProto::TMetaInfo<X> loadedMeta(data); + ss = TStringStream(); + loadedMeta.Save(ss); + UNIT_ASSERT_EQUAL(ss.Str(), data); +} + +template <typename TDecompressor, template <typename, typename> class TSerialize> +void TestWithParams(const TString& metainfo, const ECompMode mode, const TTestParams& params) { + using namespace NCompProto; + FlushPseudoRandom(); + + TStringInput stream(metainfo); + + THolder<TMetaInfo<THuff>> meta; + if (mode == CM_TWOPASS) { + TMetaInfo<THist> hist(stream); + TEmpty empty; + TSerialize<THist, TEmpty>::Serialize(hist, empty, params); + meta.Reset(new TMetaInfo<THuff>(hist, THistToHuff::Instance())); + } else { + meta.Reset(new TMetaInfo<THuff>(stream)); + } + TestSaveLoadMeta(*meta.Get()); + + TBitBuffer buffer; + TSerialize<THuff, TBitBuffer>::Serialize(*meta, buffer, params); + + ui64 codedSize = buffer.Position; + + TMetaInfo<TTable> decompressor(*meta, THuffToTable::Instance()); + + // verify that no memory read beyond buffer occurs + const size_t byteSize = buffer.ByteLength(); + const size_t PAGESIZEX = 4096; + const size_t busyPages = (byteSize + (PAGESIZEX - 1)) / PAGESIZEX; + const size_t allPages = busyPages + 1; + const size_t allocSize = (allPages + 1) * PAGESIZEX; + TVector<ui8> readBuffer(allocSize); + ui8* start = &readBuffer[0]; + ui8* pageStart = reinterpret_cast<ui8*>((size_t(start) + PAGESIZEX) & ~(PAGESIZEX - 1)); + // XX DATA DATA DATA DATA PROT + // | | | | | pages + // calculate dataStart so that data ends exactly at the page end + ui8* dataStart = pageStart + busyPages * PAGESIZEX - byteSize; + ui8* dataEnd = pageStart + busyPages * PAGESIZEX; + ProtectMemory(dataEnd, PAGESIZEX, PM_NONE); + // memory copying should be performed without any problems + memcpy(dataStart, buffer.Out.data(), byteSize); + + ui64 position = 0; + TMetaIterator<TDecompressor> instance; + // we should not read beyond dataEnd here + instance.Decompress(&decompressor, dataStart, position); + const ui64 decodedSize = position; + UNIT_ASSERT_EQUAL(codedSize, decodedSize); + // unprotect memory + ProtectMemory(dataEnd, PAGESIZEX, PM_READ | PM_WRITE | PM_EXEC); +} + +template <typename TDecompressor, template <typename, typename> class TSerialize> +void Test(const TString& metainfo, const ECompMode mode) { + for (size_t ds = 3; ds < 42; ds += (3 + PseudoRandom(5))) { + for (size_t vas = 5; vas < 42; vas += (4 + PseudoRandom(10))) { + TTestParams params; + params.DataSize = ds; + params.ValueArraySize = vas; + TestWithParams<TDecompressor, TSerialize>(metainfo, mode, params); + } + } +} + +Y_UNIT_TEST_SUITE(CompProtoTestBasic) { + using namespace NCompProto; + + const TString metainfo = + "\n\ + repeated data id 0\n\ + scalar clicks id 0 default const 0\n\ + scalar shows id 1 default const 0\n\ + repeated regClicks id 2\n\ + scalar clicks id 0 default const 0\n\ + scalar shows id 1 default const 0\n\ + end\n\ + scalar extra id 31 default const 0\n\ + end\n"; + + struct TRegInfo { + ui32 Clicks; + ui32 Shows; + }; + + struct TData { + ui32 Clicks; + ui32 Shows; + ui32 Extra; + TMap<ui32, TRegInfo> RegClicks; + }; + + TVector<TData> data; + + template <class TMeta, class TFunctor> + struct TSerialize { + static void Serialize(TMetaInfo<TMeta>& meta, TFunctor& functor, const TTestParams& params) { + FlushPseudoRandom(); + meta.BeginSelf(functor); + data.clear(); + data.resize(params.DataSize); + for (ui32 i = 0; i < params.DataSize; ++i) { + meta.BeginElement(i, functor); + + data[i].Clicks = PseudoRandom(16) + 100; + data[i].Shows = PseudoRandom(500) * PseudoRandom(16); + data[i].Extra = PseudoRandom(500) + (1UL << 31); // test also saving of big values + meta.SetScalar(0, data[i].Clicks, functor); + meta.SetScalar(1, data[i].Shows, functor); + + TMetaInfo<TMeta>& regClicks = meta.BeginRepeated(2, functor); + for (ui32 j = 0; j < PseudoRandom(200); j += 1 + PseudoRandom(10)) { + regClicks.BeginElement(j, functor); + TRegInfo& r = data[i].RegClicks[j]; + + r.Clicks = PseudoRandom(2); + r.Shows = PseudoRandom(800) * PseudoRandom(8) + 56; + regClicks.SetScalar(0, r.Clicks, functor); + regClicks.SetScalar(1, r.Shows, functor); + + regClicks.EndElement(functor); + } + regClicks.EndRepeated(functor); + meta.SetScalar(31, data[i].Extra, functor); + meta.EndElement(functor); + } + meta.EndRepeated(functor); + } + }; + + struct TMultiDecompressor: public TParentHold<TMultiDecompressor> { + struct TRegClicks: public TParentHold<TRegClicks> { + const TData* Data; + const TRegInfo* Elem; + TRegClicks() + : Data(nullptr) + , Elem(nullptr) + { + } + void BeginSelf(ui32 /*count*/, ui32 /*id*/) { + } + void EndSelf() { + } + void BeginElement(ui32 element) { + TMap<ui32, TRegInfo>::const_iterator it = Data->RegClicks.find(element); + if (it == Data->RegClicks.end()) { + UNIT_ASSERT(0); + } + Elem = &it->second; + } + void EndElement() { + } + void SetScalar(size_t index, ui32 val) { + if (index == 0) + UNIT_ASSERT_EQUAL(val, Elem->Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, Elem->Shows); + } + IDecompressor& GetDecompressor(size_t) { + UNIT_ASSERT(0); + return GetEmptyDecompressor(); + } + }; + + const TData* Elem; + TMetaIterator<TRegClicks> RegClicks; + void BeginSelf(ui32 /*count*/, ui32 /*id*/) { + } + void EndSelf() { + } + void BeginElement(ui32 element) { + UNIT_ASSERT(element < data.size()); + Elem = &data[element]; + } + void EndElement() { + } + void SetScalar(size_t index, ui32 val) { + if (index == 0) + UNIT_ASSERT_EQUAL(val, Elem->Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, Elem->Shows); + if (index == 31) + UNIT_ASSERT_EQUAL(val, Elem->Extra); + } + IDecompressor& GetDecompressor(size_t index) { + if (index == 2) { + RegClicks.Self.Data = Elem; + return RegClicks; + } + UNIT_ASSERT(0); + return GetEmptyDecompressor(); + } + TMultiDecompressor() + : Elem(nullptr) + { + } + }; + + struct TVerifyingDecompressor: public TParentHold<TVerifyingDecompressor> { + enum EState { + Startstop, + OutDataElem, + InDataElem, + InRegClicks, + }; + EState State; + + ui32 DataInd; + TMap<ui32, TRegInfo>::iterator RegIter; + + TMetaIterator<TVerifyingDecompressor>& GetDecompressor(size_t index) { + Y_UNUSED(index); + return *Parent; + } + + TVerifyingDecompressor() + : State(Startstop) + , DataInd(0) + { + } + void BeginSelf(ui32 /*count*/, ui32 id) { + switch (State) { + case Startstop: + UNIT_ASSERT_EQUAL(id, 0); + State = OutDataElem; + break; + case OutDataElem: + UNIT_ASSERT(0); + case InDataElem: + UNIT_ASSERT_EQUAL(id, 2); + State = InRegClicks; + RegIter = data[DataInd].RegClicks.begin(); + break; + case InRegClicks: + UNIT_ASSERT(0); + default: + UNIT_ASSERT(0); + } + } + void EndSelf() { + switch (State) { + case Startstop: + UNIT_ASSERT(0); + case OutDataElem: + State = Startstop; + break; + case InDataElem: + UNIT_ASSERT(0); + case InRegClicks: + UNIT_ASSERT_EQUAL(RegIter, data[DataInd].RegClicks.end()); + State = InDataElem; + break; + default: + UNIT_ASSERT(0); + } + } + void BeginElement(ui32 element) { + switch (State) { + case Startstop: + UNIT_ASSERT(0); + case OutDataElem: + UNIT_ASSERT(element < data.size()); + State = InDataElem; + break; + case InDataElem: + UNIT_ASSERT(0); + case InRegClicks: + UNIT_ASSERT_EQUAL(element, RegIter->first); + break; + } + } + void EndElement() { + switch (State) { + case Startstop: + UNIT_ASSERT(0); + case OutDataElem: + UNIT_ASSERT(0); + case InDataElem: + State = OutDataElem; + ++DataInd; + break; + case InRegClicks: + ++RegIter; + break; + } + } + + void SetScalar(size_t index, ui32 val) { + switch (State) { + case OutDataElem: + UNIT_ASSERT(0); + case InDataElem: + if (index == 0) + UNIT_ASSERT_EQUAL(val, data[DataInd].Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, data[DataInd].Shows); + if (index == 31) + UNIT_ASSERT_EQUAL(val, data[DataInd].Extra); + break; + case InRegClicks: + if (index == 0) + UNIT_ASSERT_EQUAL(val, RegIter->second.Clicks); + if (index == 1) + UNIT_ASSERT_EQUAL(val, RegIter->second.Shows); + break; + default: + UNIT_ASSERT(0); + } + } + }; + + Y_UNIT_TEST(VerifyDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_SINGLEPASS); + } + + Y_UNIT_TEST(VerifyHistDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_TWOPASS); + } + + Y_UNIT_TEST(VerifyDecompressionMulti) { + Test<TMultiDecompressor, TSerialize>(metainfo, CM_SINGLEPASS); + } + + Y_UNIT_TEST(VerifyHistDecompressionMulti) { + Test<TMultiDecompressor, TSerialize>(metainfo, CM_TWOPASS); + } +} + +Y_UNIT_TEST_SUITE(CompProtoTestExtended) { + using namespace NCompProto; + const TString metainfo = + "\n\ + repeated data id 0\n\ + repeated second id 3\n\ + scalar inner2 id 0 default const 0\n\ + end\n\ + repeated first id 2\n\ + scalar inner id 0 default const 0\n\ + end\n\ + end\n"; + TVector<std::pair<TVector<ui32>, TVector<ui32>>> data; + + template <class TMeta, class TFunctor> + struct TSerialize { + static void Serialize(TMetaInfo<TMeta>& meta, TFunctor& functor, const TTestParams& params) { + FlushPseudoRandom(); + meta.BeginSelf(functor); + data.clear(); + data.resize(params.DataSize); + for (size_t i = 0; i < params.DataSize; ++i) { + meta.BeginElement(i, functor); + TMetaInfo<TMeta>& first = meta.BeginRepeated(2, functor); + data[i].first.resize(params.ValueArraySize); + for (ui32 j = 0; j < params.ValueArraySize; j++) { + first.BeginElement(j, functor); + + ui32 val = PseudoRandom(42 * 42 * 42); + first.SetScalar(0, val, functor); + data[i].first[j] = val; + + first.EndElement(functor); + } + first.EndRepeated(functor); + + TMetaInfo<TMeta>& second = meta.BeginRepeated(3, functor); + data[i].second.resize(params.ValueArraySize); + for (ui32 j = 0; j < params.ValueArraySize; j++) { + second.BeginElement(j, functor); + + ui32 val = PseudoRandom(42 * 42 * 42); + second.SetScalar(0, val, functor); + data[i].second[j] = val; + + second.EndElement(functor); + } + second.EndRepeated(functor); + meta.EndElement(functor); + } + meta.EndRepeated(functor); + } + }; + + struct TVerifyingDecompressor: public TParentHold<TVerifyingDecompressor> { + enum EState { + Startstop, + OutDataElem, + InDataElemBeforeSecond, + InDataElemSecond, + InFirst, + InSecond, + }; + EState State; + + ui32 DataInd; + ui32 ArrayInd; + + TVerifyingDecompressor() + : State(Startstop) + , DataInd(0) + , ArrayInd(0) + { + } + + TMetaIterator<TVerifyingDecompressor>& GetDecompressor(size_t index) { + Y_UNUSED(index); + return *Parent; + } + + void BeginSelf(ui32 /*count*/, ui32 id) { + switch (State) { + case Startstop: + UNIT_ASSERT_EQUAL(id, 0); + State = OutDataElem; + break; + case InDataElemBeforeSecond: + UNIT_ASSERT_EQUAL(id, 2); + State = InFirst; + ArrayInd = 0; + break; + case InDataElemSecond: + UNIT_ASSERT_EQUAL(id, 3); + State = InSecond; + ArrayInd = 0; + break; + default: + UNIT_ASSERT(0); + } + } + void EndSelf() { + switch (State) { + case OutDataElem: + State = Startstop; + break; + case InFirst: + State = InDataElemSecond; + break; + case InSecond: + State = InDataElemSecond; + break; + default: + UNIT_ASSERT(0); + } + } + void BeginElement(ui32 element) { + switch (State) { + case OutDataElem: + UNIT_ASSERT(element < data.size()); + State = InDataElemBeforeSecond; + break; + case InFirst: + UNIT_ASSERT(element < data[DataInd].first.size()); + break; + case InSecond: + UNIT_ASSERT(element < data[DataInd].second.size()); + break; + default: + Cerr << (ui32)State << Endl; + UNIT_ASSERT(0); + } + } + void EndElement() { + switch (State) { + case InFirst: + case InSecond: + ++ArrayInd; + break; + case InDataElemSecond: + ++DataInd; + State = OutDataElem; + break; + default: + Cerr << (ui32)State << Endl; + UNIT_ASSERT(0); + } + } + + void SetScalar(size_t index, ui32 val) { + UNIT_ASSERT_EQUAL(index, 0); + switch (State) { + case InFirst: + UNIT_ASSERT_EQUAL(val, data[DataInd].first[ArrayInd]); + break; + case InSecond: + UNIT_ASSERT_EQUAL(val, data[DataInd].second[ArrayInd]); + break; + default: + UNIT_ASSERT(0); + } + } + }; + Y_UNIT_TEST(VerifyDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_SINGLEPASS); + } + + Y_UNIT_TEST(VerifyHistDecompression) { + Test<TVerifyingDecompressor, TSerialize>(metainfo, CM_TWOPASS); + } +} diff --git a/library/cpp/compproto/compressor.h b/library/cpp/compproto/compressor.h new file mode 100644 index 00000000000..14b335e13ca --- /dev/null +++ b/library/cpp/compproto/compressor.h @@ -0,0 +1,74 @@ +#pragma once + +#include <util/system/defaults.h> + +namespace NCompProto { + struct TEmpty { + }; + + struct TTable { + TTable() { + for (size_t i = 0; i < 64; ++i) { + CodeBase[i] = 0; + CodeMask[i] = 0; + Length[i] = 0; + PrefLength[i] = 0; + Id[i] = 0; + } + } + ui32 CodeBase[64]; + ui32 CodeMask[64]; + ui8 Length[64]; + ui8 PrefLength[64]; + ui8 Id[64]; + enum { + PAGE_BOUND = 4096, +#ifdef WITH_VALGRIND + SAFE_MODE = 1, +#else +#if defined(__has_feature) +#if __has_feature(address_sanitizer) + SAFE_MODE = 1, +#else + SAFE_MODE = 0, +#endif +#else + SAFE_MODE = 0, +#endif +#endif + }; + ui32 inline Decompress(const ui8* codes, ui64& offset) const { + codes += (offset >> 3); + size_t pageOff = size_t(codes) % PAGE_BOUND; + size_t readOff = offset & 7; + if (pageOff > PAGE_BOUND - 8 || SAFE_MODE) { + size_t off = 8; + ui64 res = codes[0]; + ++codes; + ui64 indexCur = ((res + 0x0000) >> readOff) & 63; + ui64 indexAlt = ((res + 0xff00) >> readOff) & 63; + if (Id[indexCur] != Id[indexAlt]) { + res += (ui64(codes[0]) << off); + ++codes; + off += 8; + indexCur = (res >> readOff) & 63; + } + ui64 index = indexCur; + ui64 length = Length[index]; + while (off < readOff + length) { + res += (ui64(codes[0]) << off); + ++codes; + off += 8; + } + offset += length; + ui64 code = res >> readOff; + return (((ui32)(code >> PrefLength[index])) & CodeMask[index]) + CodeBase[index]; + } + ui64 code = ((const ui64*)(codes))[0] >> readOff; + ui64 index = code & 63; + offset += Length[index]; + return (((ui32)(code >> PrefLength[index])) & CodeMask[index]) + CodeBase[index]; + } + }; + +} diff --git a/library/cpp/compproto/huff.h b/library/cpp/compproto/huff.h new file mode 100644 index 00000000000..fa5c139189d --- /dev/null +++ b/library/cpp/compproto/huff.h @@ -0,0 +1,402 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/generic/yexception.h> +#include <util/generic/ptr.h> +#include <util/generic/vector.h> +#include <util/generic/algorithm.h> +#include <utility> + +#include <queue> + +#include "compressor.h" + +namespace NCompProto { + template <size_t CacheSize, typename TEntry> + struct TCache { + ui32 CacheKey[CacheSize]; + TEntry CacheVal[CacheSize]; + size_t Hits; + size_t Misses; + ui32 Hash(ui32 key) { + return key % CacheSize; + } + TCache() { + Hits = 0; + Misses = 0; + Clear(); + } + void Clear() { + for (size_t i = 0; i < CacheSize; ++i) { + ui32 j = 0; + for (; Hash(j) == i; ++j) + ; + CacheKey[i] = j; + } + } + }; + + struct TCode { + i64 Probability; + ui32 Start; + ui32 Bits; + ui32 Prefix; + ui32 PrefLength; + TCode(i64 probability = 0, ui32 start = 0, ui32 bits = 0) + : Probability(probability) + , Start(start) + , Bits(bits) + { + } + + bool operator<(const TCode& code) const { + return Probability < code.Probability; + } + + bool operator>(const TCode& code) const { + return Probability > code.Probability; + } + }; + + struct TAccum { + struct TTable { + TAutoPtr<TTable> Tables[16]; + i64 Counts[16]; + TTable(const TTable& other) { + for (size_t i = 0; i < 16; ++i) { + Counts[i] = other.Counts[i]; + if (other.Tables[i].Get()) { + Tables[i].Reset(new TTable(*other.Tables[i].Get())); + } + } + } + TTable() { + for (auto& count : Counts) + count = 0; + } + + i64 GetCellCount(size_t i) { + i64 count = Counts[i]; + if (Tables[i].Get()) { + for (size_t j = 0; j < 16; ++j) { + count += Tables[i]->GetCellCount(j); + } + } + return count; + } + + i64 GetCount() { + i64 count = 0; + for (size_t j = 0; j < 16; ++j) { + count += GetCellCount(j); + } + return count; + } + + void GenerateFreqs(TVector<std::pair<i64, TCode>>& codes, int depth, int termDepth, ui32 code, i64 cnt) { + if (depth == termDepth) { + for (size_t i = 0; i < 16; ++i) { + i64 iCount = GetCellCount(i); + if (Tables[i].Get()) { + Counts[i] = iCount; + Tables[i].Reset(nullptr); + } + + if (iCount > cnt || (termDepth == 0 && iCount > 0)) { + std::pair<i64, TCode> codep; + codep.first = iCount; + codep.second.Probability = iCount; + codep.second.Start = code + (i << (28 - depth)); + codep.second.Bits = 28 - depth; + codes.push_back(codep); + Counts[i] = 0; + } + } + } + for (size_t i = 0; i < 16; ++i) { + if (Tables[i].Get()) { + Tables[i]->GenerateFreqs(codes, depth + 4, termDepth, code + (i << (28 - depth)), cnt); + } + } + } + }; + + TTable Root; + int TableCount; + i64 Total; + ui64 Max; + + TAccum() { + TableCount = 0; + Total = 0; + Max = 0; + } + + void GenerateFreqs(TVector<std::pair<i64, TCode>>& codes, int mul) const { + TTable root(Root); + + for (int i = 28; i > 0; i -= 4) { + root.GenerateFreqs(codes, 0, i, 0, Total / mul); + } + + i64 iCount = root.GetCount(); + if (iCount == 0) + return; + std::pair<i64, TCode> codep; + codep.first = iCount; + codep.second.Probability = iCount; + codep.second.Start = 0; + ui32 bits = 0; + while (1) { + if ((1ULL << bits) > Max) + break; + ++bits; + } + codep.second.Bits = bits; + codes.push_back(codep); + } + + TCache<256, i64*> Cache; + + void AddMap(ui32 value, i64 weight = 1) { + ui32 index = Cache.Hash(value); + if (Cache.CacheKey[index] == value) { + Cache.CacheVal[index][0] += weight; + return; + } + TTable* root = &Root; + for (size_t i = 0; i < 15; ++i) { + ui32 index2 = (value >> (28 - i * 4)) & 0xf; + if (!root->Tables[index2].Get()) { + if (TableCount < 1024) { + ++TableCount; + root->Tables[index2].Reset(new TTable); + } else { + Cache.CacheKey[index2] = value; + Cache.CacheVal[index2] = &root->Counts[index2]; + root->Counts[index2] += weight; + return; + } + } + root = root->Tables[index2].Get(); + } + + Cache.CacheKey[index] = value; + Cache.CacheVal[index] = &root->Counts[value & 0xf]; + root->Counts[value & 0xf] += weight; + } + + void Add(ui32 value, i64 weight = 1) { + Max = ::Max(Max, (ui64)value); + Total += weight; + AddMap(value, weight); + }; + }; + + struct THuffNode { + i64 Weight; + i64 Priority; + THuffNode* Nodes[2]; + TCode* Code; + THuffNode(i64 weight, i64 priority, TCode* code) + : Weight(weight) + , Priority(priority) + , Code(code) + { + Nodes[0] = nullptr; + Nodes[1] = nullptr; + } + + void BuildPrefixes(ui32 depth, ui32 prefix) { + if (Code) { + Code->Prefix = prefix; + Code->PrefLength = depth; + return; + } + Nodes[0]->BuildPrefixes(depth + 1, prefix + (0UL << depth)); + Nodes[1]->BuildPrefixes(depth + 1, prefix + (1UL << depth)); + } + + i64 Iterate(size_t depth) const { + if (Code) { + return (depth + Code->Bits) * Code->Probability; + } + return Nodes[0]->Iterate(depth + 1) + Nodes[1]->Iterate(depth + 1); + } + + size_t Depth() const { + if (Code) { + return 0; + } + return Max(Nodes[0]->Depth(), Nodes[1]->Depth()) + 1; + } + }; + + struct THLess { + bool operator()(const THuffNode* a, const THuffNode* b) { + if (a->Weight > b->Weight) + return 1; + if (a->Weight == b->Weight && a->Priority > b->Priority) + return 1; + return 0; + } + }; + + inline i64 BuildHuff(TVector<TCode>& codes) { + TVector<TSimpleSharedPtr<THuffNode>> hold; + std::priority_queue<THuffNode*, TVector<THuffNode*>, THLess> nodes; + i64 ret = 0; + + int priority = 0; + for (size_t i = 0; i < codes.size(); ++i) { + TSimpleSharedPtr<THuffNode> node(new THuffNode(codes[i].Probability, priority++, &codes[i])); + hold.push_back(node); + nodes.push(node.Get()); + } + + while (nodes.size() > 1) { + THuffNode* nodea = nodes.top(); + nodes.pop(); + THuffNode* nodeb = nodes.top(); + nodes.pop(); + TSimpleSharedPtr<THuffNode> node(new THuffNode(nodea->Weight + nodeb->Weight, priority++, nullptr)); + node->Nodes[0] = nodea; + node->Nodes[1] = nodeb; + hold.push_back(node); + nodes.push(node.Get()); + } + + if (nodes.size()) { + THuffNode* node = nodes.top(); + node->BuildPrefixes(0, 0); + ret = node->Iterate(0); + } + + return ret; + }; + + struct TCoderEntry { + ui32 MinValue; + ui16 Prefix; + ui8 PrefixBits; + ui8 AllBits; + + ui64 MaxValue() const { + return MinValue + (1ULL << (AllBits - PrefixBits)); + } + }; + + inline i64 Analyze(const TAccum& acc, TVector<TCoderEntry>& retCodes) { + i64 ret; + for (int k = 256; k > 0; --k) { + retCodes.clear(); + TVector<std::pair<i64, TCode>> pairs; + acc.GenerateFreqs(pairs, k); + TVector<TCode> codes; + for (size_t i = 0; i < pairs.size(); ++i) { + codes.push_back(pairs[i].second); + } + + StableSort(codes.begin(), codes.end(), std::greater<TCode>()); + + ret = BuildHuff(codes); + bool valid = true; + for (size_t i = 0; i < codes.size(); ++i) { + TCoderEntry code; + code.MinValue = codes[i].Start; + code.Prefix = codes[i].Prefix; + code.PrefixBits = codes[i].PrefLength; + if (code.PrefixBits > 6) + valid = false; + code.AllBits = code.PrefixBits + codes[i].Bits; + retCodes.push_back(code); + } + if (valid) + return ret; + } + + return ret; + } + + struct TComparer { + bool operator()(const TCoderEntry& e0, const TCoderEntry& e1) const { + return e0.AllBits < e1.AllBits; + } + }; + + struct TCoder { + TVector<TCoderEntry> Entries; + void Normalize() { + TComparer comp; + StableSort(Entries.begin(), Entries.end(), comp); + } + TCoder() { + InitDefault(); + } + void InitDefault() { + ui64 cum = 0; + Cache.Clear(); + Entries.clear(); + ui16 b = 1; + for (ui16 i = 0; i < 40; ++i) { + ui16 bits = Min(b, (ui16)(32)); + b = (b * 16) / 10 + 1; + if (b > 32) + b = 32; + TCoderEntry entry; + entry.PrefixBits = i + 1; + entry.AllBits = entry.PrefixBits + bits; + entry.MinValue = (ui32)Min(cum, (ui64)(ui32)(-1)); + cum += (1ULL << bits); + entry.Prefix = ((1UL << i) - 1); + Entries.push_back(entry); + if (cum > (ui32)(-1)) { + return; + } + }; + } + + TCache<1024, TCoderEntry> Cache; + + ui64 RealCode(ui32 value, const TCoderEntry& entry, size_t& length) { + length = entry.AllBits; + return (ui64(value - entry.MinValue) << entry.PrefixBits) + entry.Prefix; + } + + bool Empty() const { + return Entries.empty(); + } + const TCoderEntry& GetEntry(ui32 code, ui8& id) const { + for (size_t i = 0; i < Entries.size(); ++i) { + const TCoderEntry& entry = Entries[i]; + ui32 prefMask = (1UL << entry.PrefixBits) - 1UL; + if (entry.Prefix == (code & prefMask)) { + id = ui8(i); + return entry; + } + } + ythrow yexception() << "bad entry"; + return Entries[0]; + } + + ui64 Code(ui32 entry, size_t& length) { + ui32 index = Cache.Hash(entry); + if (Cache.CacheKey[index] == entry) { + ++Cache.Hits; + return RealCode(entry, Cache.CacheVal[index], length); + } + ++Cache.Misses; + for (size_t i = 0; i < Entries.size(); ++i) { + if (entry >= Entries[i].MinValue && entry < Entries[i].MaxValue()) { + Cache.CacheKey[index] = entry; + Cache.CacheVal[index] = Entries[i]; + return RealCode(entry, Cache.CacheVal[index], length); + } + } + + ythrow yexception() << "bad huff tree"; + return 0; + } + }; + +} diff --git a/library/cpp/compproto/lib.cpp b/library/cpp/compproto/lib.cpp new file mode 100644 index 00000000000..dd23e5aa11d --- /dev/null +++ b/library/cpp/compproto/lib.cpp @@ -0,0 +1,4 @@ +#include "bit.h" +#include "compressor.h" +#include "huff.h" +#include "metainfo.h" diff --git a/library/cpp/compproto/metainfo.h b/library/cpp/compproto/metainfo.h new file mode 100644 index 00000000000..6e68f86e12a --- /dev/null +++ b/library/cpp/compproto/metainfo.h @@ -0,0 +1,304 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/generic/yexception.h> +#include <util/generic/ptr.h> +#include <util/generic/refcount.h> +#include <util/stream/input.h> +#include <util/stream/str.h> + +#include "compressor.h" + +namespace NCompProto { + const size_t MAX_ELEMENTS = 32; + + struct TScalarDefaultValue { + TScalarDefaultValue() + : Type(None) + , Value(0) + { + } + enum EType { + None = 0, + Fixed = 1, + }; + EType Type; + ui32 Value; + }; + + template <class X> + struct TMetaInfo { + X Index; + X Count; + X Mask; + X Scalar[MAX_ELEMENTS]; + TAtomicSharedPtr<TMetaInfo> Repeated[MAX_ELEMENTS]; + TScalarDefaultValue Default[MAX_ELEMENTS]; + + ui32 ScalarMask; + ui32 RepeatedMask; + size_t Size; + TString Name; + TString ChildName[MAX_ELEMENTS]; + size_t Id; + TMetaInfo* Parent; + + struct TCheck { + ui32 Count; + ui32 Mask; + ui32 LastIndex; + ui32 Check(size_t id, size_t rule) { + ui32 mask = 1UL << id; + if (Mask & mask) { + ythrow yexception() << "Mask & mask"; + } + Mask |= mask; + Y_ENSURE(rule & mask, "!(rule & mask)"); + return mask; + } + void ClearCount() { + Count = 0; + } + void ClearMask() { + Mask = 0; + } + void ClearIndex() { + LastIndex = ui32(-1); + } + void ClearAll() { + ClearCount(); + ClearIndex(); + ClearMask(); + } + TCheck() { + ClearAll(); + } + }; + + TCheck Serializer; + + void SetDefaults(TMetaInfo* parent) { + ScalarMask = 0; + RepeatedMask = 0; + Size = 0; + Id = 0; + Parent = parent; + } + + template <class T, class TBuilder> + TMetaInfo(const TMetaInfo<T>& meta, const TBuilder& builder) { + SetDefaults(nullptr); + Name = meta.Name; + builder.Build(Index, meta.Index); + builder.Build(Count, meta.Count); + builder.Build(Mask, meta.Mask); + Size = meta.Size; + Id = meta.Id; + ScalarMask = meta.ScalarMask; + RepeatedMask = meta.RepeatedMask; + for (size_t i = 0; i < Size; ++i) { + ui32 index = (1UL << i); + if (ScalarMask & index) { + builder.Build(Scalar[i], meta.Scalar[i]); + ChildName[i] = meta.ChildName[i]; + Default[i] = meta.Default[i]; + } + if (RepeatedMask & index) { + THolder<TMetaInfo> info(new TMetaInfo(*meta.Repeated[i], builder)); + info->Parent = this; + Repeated[i].Reset(info.Release()); + } + } + } + + TMetaInfo(TMetaInfo* parent) { + SetDefaults(parent); + } + + template <class T> + TMetaInfo& BeginRepeated(size_t id, T& functor) { + Serializer.Check(id, RepeatedMask); + TMetaInfo& res = *Repeated[id].Get(); + res.Count.BeginElement(functor); + return res; + } + + template <class T> + void BeginSelf(T& functor) { + Serializer.ClearAll(); + Count.BeginElement(functor); + } + + template <class T> + void EndRepeated(T& functor) { + Count.AddDelayed(Serializer.Count, functor); + Serializer.ClearCount(); + Serializer.ClearIndex(); + Y_ENSURE(Serializer.Mask == 0, "Serializer.Mask != 0"); + } + + template <class T> + void BeginElement(ui32 index, T& functor) { + Y_ENSURE(!(index < Serializer.LastIndex + 1), + "Name: " << Name.Quote() << " error: index < Serializer.LastIndex + 1; " + << "index: " << index << " LastIndex: " << Serializer.LastIndex); + Index.Add(index - Serializer.LastIndex, functor); + Mask.BeginElement(functor); + Serializer.LastIndex = index; + ++Serializer.Count; + } + + template <class TFunctor> + void Iterate(TFunctor& functor) { + Cout << Name << " " + << "Index" << Endl; + functor.Process(Index); + Cout << "Count" << Endl; + functor.Process(Count); + Cout << "Mask" << Endl; + functor.Process(Mask); + + for (size_t i = 0; i < Size; ++i) { + ui32 index = (1UL << i); + if (ScalarMask & index) { + Cout << "scalar" + << " " << i << Endl; + functor.Process(Scalar[i]); + } + if (RepeatedMask & index) { + Cout << "repeated" + << " " << i << Endl; + Repeated[i]->Iterate(functor); + } + } + } + + template <class T> + void EndElement(T& functor) { + Mask.AddDelayed(Serializer.Mask, functor); + Serializer.ClearMask(); + } + + template <class T> + void SetScalar(size_t id, ui32 value, T& functor) { + if (Default[id].Type != TScalarDefaultValue::Fixed || value != Default[id].Value) { + Serializer.Check(id, ScalarMask); + Scalar[id].Add(value, functor); + } + } + + ui32 Check(size_t id) { + Y_ENSURE(id < MAX_ELEMENTS, "id >= MAX_ELEMENTS"); + + ui32 mask = 1UL << id; + if (ScalarMask & mask) { + ythrow yexception() << "ScalarMask & mask"; + } + if (RepeatedMask & mask) { + ythrow yexception() << "RepeatedMask & mask"; + } + Size = Max(id + 1, Size); + return mask; + } + + TMetaInfo(IInputStream& stream) { + SetDefaults(nullptr); + Load(stream); + } + + TMetaInfo(const TString& str) { + SetDefaults(nullptr); + TStringInput stream(str); + Load(stream); + } + + void Save(IOutputStream& stream, const TString& offset = TString()) { + stream << offset << "repeated " << Name << " id " << Id << Endl; + TString step = " "; + stream << step << offset << "index" << Endl; + Index.Save(stream, step + offset); + stream << step << offset << "count" << Endl; + Count.Save(stream, step + offset); + stream << step << offset << "mask" << Endl; + Mask.Save(stream, step + offset); + + for (size_t i = 0; i < MAX_ELEMENTS; ++i) { + ui32 mask = 1UL << i; + if (mask & RepeatedMask) { + Repeated[i]->Save(stream, step + offset); + } else if (mask & ScalarMask) { + stream << step << offset << "scalar " << ChildName[i] << " id " << i; + stream << " default " << (Default[i].Type == TScalarDefaultValue::None ? " no " : " const "); + if (Default[i].Type == TScalarDefaultValue::Fixed) { + stream << Default[i].Value; + } + stream << Endl; + stream << step << offset << "table " + << "id " << i << Endl; + Scalar[i].Save(stream, step + offset); + } + } + stream << offset << "end" << Endl; + } + + void Load(IInputStream& stream) { + TString name; + stream >> name; + if (name == "repeated") { + stream >> name; + } + Name = name; + stream >> name; + Y_ENSURE(name == "id", "Name mismatch: " << name.Quote() << " != id. "); + stream >> Id; + + while (1) { + stream >> name; + if (name == "index") { + Index.Load(stream); + } else if (name == "count") { + Count.Load(stream); + } else if (name == "mask") { + Mask.Load(stream); + } else if (name == "table") { + stream >> name; + Y_ENSURE(name == "id", "Name mismatch: " << name.Quote() << " != id. "); + size_t id; + stream >> id; + Scalar[id].Load(stream); + } else if (name == "repeated") { + THolder<TMetaInfo> info(new TMetaInfo(this)); + info->Load(stream); + size_t id = info->Id; + RepeatedMask |= Check(id); + Repeated[id].Reset(info.Release()); + } else if (name == "scalar") { + stream >> name; + TString childName = name; + stream >> name; + Y_ENSURE(name == "id", "Name mismatch: " << name.Quote() << " != id. "); + size_t id; + stream >> id; + ScalarMask |= Check(id); + ChildName[id] = childName; + stream >> name; + Y_ENSURE(name == "default", "Name mismatch: " << name.Quote() << " != default. "); + stream >> name; + if (name == "no") { + Default[id].Type = TScalarDefaultValue::None; + } else if (name == "const") { + ui32 def; + stream >> def; + Default[id].Type = TScalarDefaultValue::Fixed; + Default[id].Value = def; + } else { + ythrow yexception() << "Unsupported default value specification: " << name.Quote(); + } + } else if (name == "end" /* || stream.IsEOF()*/) { + return; + } + } + } + }; + +} diff --git a/library/cpp/compproto/ut/ya.make b/library/cpp/compproto/ut/ya.make new file mode 100644 index 00000000000..f197a582690 --- /dev/null +++ b/library/cpp/compproto/ut/ya.make @@ -0,0 +1,15 @@ +UNITTEST() + +OWNER(ironpeter) + +PEERDIR( + ADDINCL library/cpp/compproto +) + +SRCDIR(library/cpp/compproto) + +SRCS( + compproto_ut.cpp +) + +END() diff --git a/library/cpp/compproto/ya.make b/library/cpp/compproto/ya.make new file mode 100644 index 00000000000..60d5cfa08d3 --- /dev/null +++ b/library/cpp/compproto/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +OWNER(ironpeter) + +SRCS( + bit.h + compressor.h + huff.h + metainfo.h + lib.cpp +) + +END() |